apify 3.0.0rc1__py3-none-any.whl → 3.0.1b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/_actor.py +150 -117
- apify/_charging.py +19 -0
- apify/_configuration.py +51 -11
- apify/events/__init__.py +2 -2
- apify/storage_clients/__init__.py +2 -0
- apify/storage_clients/_apify/_dataset_client.py +47 -23
- apify/storage_clients/_apify/_key_value_store_client.py +46 -22
- apify/storage_clients/_apify/_models.py +25 -1
- apify/storage_clients/_apify/_request_queue_client.py +188 -648
- apify/storage_clients/_apify/_request_queue_shared_client.py +527 -0
- apify/storage_clients/_apify/_request_queue_single_client.py +399 -0
- apify/storage_clients/_apify/_storage_client.py +55 -29
- apify/storage_clients/_apify/_utils.py +194 -0
- apify/storage_clients/_file_system/_key_value_store_client.py +70 -3
- apify/storage_clients/_file_system/_storage_client.py +7 -1
- apify/storage_clients/_smart_apify/__init__.py +1 -0
- apify/storage_clients/_smart_apify/_storage_client.py +117 -0
- {apify-3.0.0rc1.dist-info → apify-3.0.1b2.dist-info}/METADATA +20 -5
- {apify-3.0.0rc1.dist-info → apify-3.0.1b2.dist-info}/RECORD +21 -16
- {apify-3.0.0rc1.dist-info → apify-3.0.1b2.dist-info}/WHEEL +0 -0
- {apify-3.0.0rc1.dist-info → apify-3.0.1b2.dist-info}/licenses/LICENSE +0 -0
apify/_actor.py
CHANGED
|
@@ -5,6 +5,7 @@ import os
|
|
|
5
5
|
import sys
|
|
6
6
|
from contextlib import suppress
|
|
7
7
|
from datetime import datetime, timedelta, timezone
|
|
8
|
+
from functools import cached_property
|
|
8
9
|
from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast, overload
|
|
9
10
|
|
|
10
11
|
from lazy_object_proxy import Proxy
|
|
@@ -14,6 +15,7 @@ from pydantic import AliasChoices
|
|
|
14
15
|
from apify_client import ApifyClientAsync
|
|
15
16
|
from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
|
|
16
17
|
from crawlee import service_locator
|
|
18
|
+
from crawlee.errors import ServiceConflictError
|
|
17
19
|
from crawlee.events import (
|
|
18
20
|
Event,
|
|
19
21
|
EventAbortingData,
|
|
@@ -34,6 +36,8 @@ from apify._utils import docs_group, docs_name, get_system_info, is_running_in_i
|
|
|
34
36
|
from apify.events import ApifyEventManager, EventManager, LocalEventManager
|
|
35
37
|
from apify.log import _configure_logging, logger
|
|
36
38
|
from apify.storage_clients import ApifyStorageClient
|
|
39
|
+
from apify.storage_clients._file_system import ApifyFileSystemStorageClient
|
|
40
|
+
from apify.storage_clients._smart_apify._storage_client import SmartApifyStorageClient
|
|
37
41
|
from apify.storages import Dataset, KeyValueStore, RequestQueue
|
|
38
42
|
|
|
39
43
|
if TYPE_CHECKING:
|
|
@@ -44,7 +48,6 @@ if TYPE_CHECKING:
|
|
|
44
48
|
from typing_extensions import Self
|
|
45
49
|
|
|
46
50
|
from crawlee.proxy_configuration import _NewUrlFunction
|
|
47
|
-
from crawlee.storage_clients import StorageClient
|
|
48
51
|
|
|
49
52
|
from apify._models import Webhook
|
|
50
53
|
|
|
@@ -119,28 +122,14 @@ class _ActorType:
|
|
|
119
122
|
self._exit_process = self._get_default_exit_process() if exit_process is None else exit_process
|
|
120
123
|
self._is_exiting = False
|
|
121
124
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
# Create an instance of the cloud storage client, the local storage client is obtained
|
|
127
|
-
# from the service locator.
|
|
128
|
-
self._cloud_storage_client = ApifyStorageClient()
|
|
125
|
+
# Actor state when this method is being executed is unpredictable.
|
|
126
|
+
# Actor can be initialized by lazy object proxy or by user directly, or by both.
|
|
127
|
+
# Until `init` method is run, this state of uncertainty remains. This is the reason why any setting done here in
|
|
128
|
+
# `__init__` method should not be considered final.
|
|
129
129
|
|
|
130
|
-
|
|
131
|
-
self.
|
|
132
|
-
|
|
133
|
-
configuration=self._configuration,
|
|
134
|
-
persist_state_interval=self._configuration.persist_state_interval,
|
|
135
|
-
)
|
|
136
|
-
if self.is_at_home()
|
|
137
|
-
else LocalEventManager(
|
|
138
|
-
system_info_interval=self._configuration.system_info_interval,
|
|
139
|
-
persist_state_interval=self._configuration.persist_state_interval,
|
|
140
|
-
)
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
self._charging_manager = ChargingManagerImplementation(self._configuration, self._apify_client)
|
|
130
|
+
self._configuration = configuration
|
|
131
|
+
self._configure_logging = configure_logging
|
|
132
|
+
self._apify_client: ApifyClientAsync | None = None
|
|
144
133
|
|
|
145
134
|
self._is_initialized = False
|
|
146
135
|
|
|
@@ -198,46 +187,87 @@ class _ActorType:
|
|
|
198
187
|
@property
|
|
199
188
|
def apify_client(self) -> ApifyClientAsync:
|
|
200
189
|
"""The ApifyClientAsync instance the Actor instance uses."""
|
|
190
|
+
if not self._apify_client:
|
|
191
|
+
self._apify_client = self.new_client()
|
|
201
192
|
return self._apify_client
|
|
202
193
|
|
|
203
|
-
@
|
|
194
|
+
@cached_property
|
|
204
195
|
def configuration(self) -> Configuration:
|
|
205
196
|
"""The Configuration instance the Actor instance uses."""
|
|
206
|
-
|
|
197
|
+
if self._configuration:
|
|
198
|
+
return self._configuration
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
# Set implicit default Apify configuration, unless configuration was already set.
|
|
202
|
+
implicit_configuration = Configuration()
|
|
203
|
+
service_locator.set_configuration(implicit_configuration)
|
|
204
|
+
self._configuration = implicit_configuration
|
|
205
|
+
except ServiceConflictError:
|
|
206
|
+
self.log.debug(
|
|
207
|
+
'Configuration in service locator was set explicitly before Actor.init was called.'
|
|
208
|
+
'Using the existing configuration as implicit configuration for the Actor.'
|
|
209
|
+
)
|
|
207
210
|
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
"""The Configuration instance the Actor instance uses."""
|
|
211
|
+
# Use the configuration from the service locator
|
|
212
|
+
self._configuration = Configuration.get_global_configuration()
|
|
211
213
|
return self._configuration
|
|
212
214
|
|
|
213
|
-
@
|
|
215
|
+
@cached_property
|
|
214
216
|
def event_manager(self) -> EventManager:
|
|
215
217
|
"""The EventManager instance the Actor instance uses."""
|
|
216
|
-
return
|
|
218
|
+
return (
|
|
219
|
+
ApifyEventManager(
|
|
220
|
+
configuration=self.configuration,
|
|
221
|
+
persist_state_interval=self.configuration.persist_state_interval,
|
|
222
|
+
)
|
|
223
|
+
if self.is_at_home()
|
|
224
|
+
else LocalEventManager(
|
|
225
|
+
system_info_interval=self.configuration.system_info_interval,
|
|
226
|
+
persist_state_interval=self.configuration.persist_state_interval,
|
|
227
|
+
)
|
|
228
|
+
)
|
|
217
229
|
|
|
218
230
|
@property
|
|
219
231
|
def log(self) -> logging.Logger:
|
|
220
232
|
"""The logging.Logger instance the Actor uses."""
|
|
221
233
|
return logger
|
|
222
234
|
|
|
223
|
-
@property
|
|
224
|
-
def _local_storage_client(self) -> StorageClient:
|
|
225
|
-
"""The local storage client the Actor instance uses."""
|
|
226
|
-
return service_locator.get_storage_client()
|
|
227
|
-
|
|
228
235
|
def _raise_if_not_initialized(self) -> None:
|
|
229
236
|
if not self._is_initialized:
|
|
230
237
|
raise RuntimeError('The Actor was not initialized!')
|
|
231
238
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
239
|
+
@cached_property
|
|
240
|
+
def _storage_client(self) -> SmartApifyStorageClient:
|
|
241
|
+
"""Storage client used by the actor.
|
|
235
242
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
243
|
+
Depending on the initialization of the service locator the client can be created in different ways.
|
|
244
|
+
"""
|
|
245
|
+
try:
|
|
246
|
+
# Nothing was set by the user.
|
|
247
|
+
implicit_storage_client = SmartApifyStorageClient(
|
|
248
|
+
local_storage_client=ApifyFileSystemStorageClient(), cloud_storage_client=ApifyStorageClient()
|
|
240
249
|
)
|
|
250
|
+
service_locator.set_storage_client(implicit_storage_client)
|
|
251
|
+
except ServiceConflictError:
|
|
252
|
+
self.log.debug(
|
|
253
|
+
'Storage client in service locator was set explicitly before Actor.init was called.'
|
|
254
|
+
'Using the existing storage client as implicit storage client for the Actor.'
|
|
255
|
+
)
|
|
256
|
+
else:
|
|
257
|
+
return implicit_storage_client
|
|
258
|
+
|
|
259
|
+
# User set something in the service locator.
|
|
260
|
+
explicit_storage_client = service_locator.get_storage_client()
|
|
261
|
+
if isinstance(explicit_storage_client, SmartApifyStorageClient):
|
|
262
|
+
# The client was manually set to the right type in the service locator. This is the explicit way.
|
|
263
|
+
return explicit_storage_client
|
|
264
|
+
|
|
265
|
+
raise RuntimeError(
|
|
266
|
+
'The storage client in the service locator has to be instance of SmartApifyStorageClient. If you want to '
|
|
267
|
+
'set the storage client manually you have to call '
|
|
268
|
+
'`service_locator.set_storage_client(SmartApifyStorageClient(...))` before entering Actor context or '
|
|
269
|
+
'awaiting `Actor.init`.'
|
|
270
|
+
)
|
|
241
271
|
|
|
242
272
|
async def init(self) -> None:
|
|
243
273
|
"""Initialize the Actor instance.
|
|
@@ -250,6 +280,14 @@ class _ActorType:
|
|
|
250
280
|
This method should be called immediately before performing any additional Actor actions, and it should be
|
|
251
281
|
called only once.
|
|
252
282
|
"""
|
|
283
|
+
self.log.info('Initializing Actor...')
|
|
284
|
+
if self._configuration:
|
|
285
|
+
# Set explicitly the configuration in the service locator
|
|
286
|
+
service_locator.set_configuration(self.configuration)
|
|
287
|
+
else:
|
|
288
|
+
# Ensure that the configuration (cached property) is set
|
|
289
|
+
_ = self.configuration
|
|
290
|
+
|
|
253
291
|
if self._is_initialized:
|
|
254
292
|
raise RuntimeError('The Actor was already initialized!')
|
|
255
293
|
|
|
@@ -262,27 +300,20 @@ class _ActorType:
|
|
|
262
300
|
self._is_exiting = False
|
|
263
301
|
self._was_final_persist_state_emitted = False
|
|
264
302
|
|
|
265
|
-
|
|
266
|
-
if self.is_at_home():
|
|
267
|
-
service_locator.set_storage_client(self._cloud_storage_client)
|
|
303
|
+
self.log.debug(f'Storage client set to {self._storage_client}')
|
|
268
304
|
|
|
269
305
|
service_locator.set_event_manager(self.event_manager)
|
|
270
|
-
service_locator.set_configuration(self.configuration)
|
|
271
306
|
|
|
272
307
|
# The logging configuration has to be called after all service_locator set methods.
|
|
273
308
|
if self._configure_logging:
|
|
274
309
|
_configure_logging()
|
|
275
310
|
|
|
276
|
-
self.log.info('Initializing Actor...')
|
|
277
311
|
self.log.info('System info', extra=get_system_info())
|
|
278
312
|
|
|
279
|
-
|
|
280
|
-
# https://github.com/apify/apify-sdk-python/issues/146
|
|
281
|
-
|
|
282
|
-
await self._event_manager.__aenter__()
|
|
313
|
+
await self.event_manager.__aenter__()
|
|
283
314
|
self.log.debug('Event manager initialized')
|
|
284
315
|
|
|
285
|
-
await self.
|
|
316
|
+
await self._charging_manager_implementation.__aenter__()
|
|
286
317
|
self.log.debug('Charging manager initialized')
|
|
287
318
|
|
|
288
319
|
self._is_initialized = True
|
|
@@ -323,10 +354,10 @@ class _ActorType:
|
|
|
323
354
|
await asyncio.sleep(0.1)
|
|
324
355
|
|
|
325
356
|
if event_listeners_timeout:
|
|
326
|
-
await self.
|
|
357
|
+
await self.event_manager.wait_for_all_listeners_to_complete(timeout=event_listeners_timeout)
|
|
327
358
|
|
|
328
|
-
await self.
|
|
329
|
-
await self.
|
|
359
|
+
await self.event_manager.__aexit__(None, None, None)
|
|
360
|
+
await self._charging_manager_implementation.__aexit__(None, None, None)
|
|
330
361
|
|
|
331
362
|
await asyncio.wait_for(finalize(), cleanup_timeout.total_seconds())
|
|
332
363
|
self._is_initialized = False
|
|
@@ -385,8 +416,8 @@ class _ActorType:
|
|
|
385
416
|
(increases exponentially from this value).
|
|
386
417
|
timeout: The socket timeout of the HTTP requests sent to the Apify API.
|
|
387
418
|
"""
|
|
388
|
-
token = token or self.
|
|
389
|
-
api_url = api_url or self.
|
|
419
|
+
token = token or self.configuration.token
|
|
420
|
+
api_url = api_url or self.configuration.api_base_url
|
|
390
421
|
return ApifyClientAsync(
|
|
391
422
|
token=token,
|
|
392
423
|
api_url=api_url,
|
|
@@ -401,6 +432,7 @@ class _ActorType:
|
|
|
401
432
|
self,
|
|
402
433
|
*,
|
|
403
434
|
id: str | None = None,
|
|
435
|
+
alias: str | None = None,
|
|
404
436
|
name: str | None = None,
|
|
405
437
|
force_cloud: bool = False,
|
|
406
438
|
) -> Dataset:
|
|
@@ -411,10 +443,12 @@ class _ActorType:
|
|
|
411
443
|
the Apify cloud.
|
|
412
444
|
|
|
413
445
|
Args:
|
|
414
|
-
id: ID of the dataset to
|
|
415
|
-
|
|
416
|
-
name:
|
|
417
|
-
|
|
446
|
+
id: The ID of the dataset to open. If provided, searches for existing dataset by ID.
|
|
447
|
+
Mutually exclusive with name and alias.
|
|
448
|
+
name: The name of the dataset to open (global scope, persists across runs).
|
|
449
|
+
Mutually exclusive with id and alias.
|
|
450
|
+
alias: The alias of the dataset to open (run scope, creates unnamed storage).
|
|
451
|
+
Mutually exclusive with id and name.
|
|
418
452
|
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
|
|
419
453
|
to combine local and cloud storage.
|
|
420
454
|
|
|
@@ -422,21 +456,18 @@ class _ActorType:
|
|
|
422
456
|
An instance of the `Dataset` class for the given ID or name.
|
|
423
457
|
"""
|
|
424
458
|
self._raise_if_not_initialized()
|
|
425
|
-
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
|
|
426
|
-
|
|
427
|
-
storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
|
|
428
|
-
|
|
429
459
|
return await Dataset.open(
|
|
430
460
|
id=id,
|
|
431
461
|
name=name,
|
|
432
|
-
|
|
433
|
-
storage_client=
|
|
462
|
+
alias=alias,
|
|
463
|
+
storage_client=self._storage_client.get_suitable_storage_client(force_cloud=force_cloud),
|
|
434
464
|
)
|
|
435
465
|
|
|
436
466
|
async def open_key_value_store(
|
|
437
467
|
self,
|
|
438
468
|
*,
|
|
439
469
|
id: str | None = None,
|
|
470
|
+
alias: str | None = None,
|
|
440
471
|
name: str | None = None,
|
|
441
472
|
force_cloud: bool = False,
|
|
442
473
|
) -> KeyValueStore:
|
|
@@ -446,10 +477,12 @@ class _ActorType:
|
|
|
446
477
|
and retrieved using a unique key. The actual data is stored either on a local filesystem or in the Apify cloud.
|
|
447
478
|
|
|
448
479
|
Args:
|
|
449
|
-
id: ID of the
|
|
450
|
-
|
|
451
|
-
name:
|
|
452
|
-
|
|
480
|
+
id: The ID of the KVS to open. If provided, searches for existing KVS by ID.
|
|
481
|
+
Mutually exclusive with name and alias.
|
|
482
|
+
name: The name of the KVS to open (global scope, persists across runs).
|
|
483
|
+
Mutually exclusive with id and alias.
|
|
484
|
+
alias: The alias of the KVS to open (run scope, creates unnamed storage).
|
|
485
|
+
Mutually exclusive with id and name.
|
|
453
486
|
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
|
|
454
487
|
to combine local and cloud storage.
|
|
455
488
|
|
|
@@ -457,20 +490,18 @@ class _ActorType:
|
|
|
457
490
|
An instance of the `KeyValueStore` class for the given ID or name.
|
|
458
491
|
"""
|
|
459
492
|
self._raise_if_not_initialized()
|
|
460
|
-
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
|
|
461
|
-
storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
|
|
462
|
-
|
|
463
493
|
return await KeyValueStore.open(
|
|
464
494
|
id=id,
|
|
465
495
|
name=name,
|
|
466
|
-
|
|
467
|
-
storage_client=
|
|
496
|
+
alias=alias,
|
|
497
|
+
storage_client=self._storage_client.get_suitable_storage_client(force_cloud=force_cloud),
|
|
468
498
|
)
|
|
469
499
|
|
|
470
500
|
async def open_request_queue(
|
|
471
501
|
self,
|
|
472
502
|
*,
|
|
473
503
|
id: str | None = None,
|
|
504
|
+
alias: str | None = None,
|
|
474
505
|
name: str | None = None,
|
|
475
506
|
force_cloud: bool = False,
|
|
476
507
|
) -> RequestQueue:
|
|
@@ -482,10 +513,12 @@ class _ActorType:
|
|
|
482
513
|
crawling orders.
|
|
483
514
|
|
|
484
515
|
Args:
|
|
485
|
-
id: ID of the
|
|
486
|
-
|
|
487
|
-
name:
|
|
488
|
-
|
|
516
|
+
id: The ID of the RQ to open. If provided, searches for existing RQ by ID.
|
|
517
|
+
Mutually exclusive with name and alias.
|
|
518
|
+
name: The name of the RQ to open (global scope, persists across runs).
|
|
519
|
+
Mutually exclusive with id and alias.
|
|
520
|
+
alias: The alias of the RQ to open (run scope, creates unnamed storage).
|
|
521
|
+
Mutually exclusive with id and name.
|
|
489
522
|
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
|
|
490
523
|
to combine local and cloud storage.
|
|
491
524
|
|
|
@@ -493,15 +526,11 @@ class _ActorType:
|
|
|
493
526
|
An instance of the `RequestQueue` class for the given ID or name.
|
|
494
527
|
"""
|
|
495
528
|
self._raise_if_not_initialized()
|
|
496
|
-
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
|
|
497
|
-
|
|
498
|
-
storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
|
|
499
|
-
|
|
500
529
|
return await RequestQueue.open(
|
|
501
530
|
id=id,
|
|
502
531
|
name=name,
|
|
503
|
-
|
|
504
|
-
storage_client=
|
|
532
|
+
alias=alias,
|
|
533
|
+
storage_client=self._storage_client.get_suitable_storage_client(force_cloud=force_cloud),
|
|
505
534
|
)
|
|
506
535
|
|
|
507
536
|
@overload
|
|
@@ -524,7 +553,7 @@ class _ActorType:
|
|
|
524
553
|
data = data if isinstance(data, list) else [data]
|
|
525
554
|
|
|
526
555
|
max_charged_count = (
|
|
527
|
-
self.
|
|
556
|
+
self.get_charging_manager().calculate_max_event_charge_count_within_limit(charged_event_name)
|
|
528
557
|
if charged_event_name is not None
|
|
529
558
|
else None
|
|
530
559
|
)
|
|
@@ -538,7 +567,7 @@ class _ActorType:
|
|
|
538
567
|
await dataset.push_data(data)
|
|
539
568
|
|
|
540
569
|
if charged_event_name:
|
|
541
|
-
return await self.
|
|
570
|
+
return await self.get_charging_manager().charge(
|
|
542
571
|
event_name=charged_event_name,
|
|
543
572
|
count=min(max_charged_count, len(data)) if max_charged_count is not None else len(data),
|
|
544
573
|
)
|
|
@@ -549,9 +578,9 @@ class _ActorType:
|
|
|
549
578
|
"""Get the Actor input value from the default key-value store associated with the current Actor run."""
|
|
550
579
|
self._raise_if_not_initialized()
|
|
551
580
|
|
|
552
|
-
input_value = await self.get_value(self.
|
|
553
|
-
input_secrets_private_key = self.
|
|
554
|
-
input_secrets_key_passphrase = self.
|
|
581
|
+
input_value = await self.get_value(self.configuration.input_key)
|
|
582
|
+
input_secrets_private_key = self.configuration.input_secrets_private_key_file
|
|
583
|
+
input_secrets_key_passphrase = self.configuration.input_secrets_private_key_passphrase
|
|
555
584
|
if input_secrets_private_key and input_secrets_key_passphrase:
|
|
556
585
|
private_key = load_private_key(
|
|
557
586
|
input_secrets_private_key,
|
|
@@ -595,7 +624,11 @@ class _ActorType:
|
|
|
595
624
|
def get_charging_manager(self) -> ChargingManager:
|
|
596
625
|
"""Retrieve the charging manager to access granular pricing information."""
|
|
597
626
|
self._raise_if_not_initialized()
|
|
598
|
-
return self.
|
|
627
|
+
return self._charging_manager_implementation
|
|
628
|
+
|
|
629
|
+
@cached_property
|
|
630
|
+
def _charging_manager_implementation(self) -> ChargingManagerImplementation:
|
|
631
|
+
return ChargingManagerImplementation(self.configuration, self.apify_client)
|
|
599
632
|
|
|
600
633
|
async def charge(self, event_name: str, count: int = 1) -> ChargeResult:
|
|
601
634
|
"""Charge for a specified number of events - sub-operations of the Actor.
|
|
@@ -607,7 +640,7 @@ class _ActorType:
|
|
|
607
640
|
count: Number of events to charge for.
|
|
608
641
|
"""
|
|
609
642
|
self._raise_if_not_initialized()
|
|
610
|
-
return await self.
|
|
643
|
+
return await self.get_charging_manager().charge(event_name, count)
|
|
611
644
|
|
|
612
645
|
@overload
|
|
613
646
|
def on(
|
|
@@ -658,7 +691,7 @@ class _ActorType:
|
|
|
658
691
|
"""
|
|
659
692
|
self._raise_if_not_initialized()
|
|
660
693
|
|
|
661
|
-
self.
|
|
694
|
+
self.event_manager.on(event=event_name, listener=listener)
|
|
662
695
|
return listener
|
|
663
696
|
|
|
664
697
|
@overload
|
|
@@ -684,11 +717,11 @@ class _ActorType:
|
|
|
684
717
|
"""
|
|
685
718
|
self._raise_if_not_initialized()
|
|
686
719
|
|
|
687
|
-
self.
|
|
720
|
+
self.event_manager.off(event=event_name, listener=listener)
|
|
688
721
|
|
|
689
722
|
def is_at_home(self) -> bool:
|
|
690
723
|
"""Return `True` when the Actor is running on the Apify platform, and `False` otherwise (e.g. local run)."""
|
|
691
|
-
return self.
|
|
724
|
+
return self.configuration.is_at_home
|
|
692
725
|
|
|
693
726
|
def get_env(self) -> dict:
|
|
694
727
|
"""Return a dictionary with information parsed from all the `APIFY_XXX` environment variables.
|
|
@@ -714,7 +747,7 @@ class _ActorType:
|
|
|
714
747
|
aliases = [field_name]
|
|
715
748
|
|
|
716
749
|
for alias in aliases:
|
|
717
|
-
config[alias] = getattr(self.
|
|
750
|
+
config[alias] = getattr(self.configuration, field_name)
|
|
718
751
|
|
|
719
752
|
env_vars = {env_var.value.lower(): env_var.name.lower() for env_var in [*ActorEnvVars, *ApifyEnvVars]}
|
|
720
753
|
return {option_name: config[env_var] for env_var, option_name in env_vars.items() if env_var in config}
|
|
@@ -759,7 +792,7 @@ class _ActorType:
|
|
|
759
792
|
"""
|
|
760
793
|
self._raise_if_not_initialized()
|
|
761
794
|
|
|
762
|
-
client = self.new_client(token=token) if token else self.
|
|
795
|
+
client = self.new_client(token=token) if token else self.apify_client
|
|
763
796
|
|
|
764
797
|
if webhooks:
|
|
765
798
|
serialized_webhooks = [
|
|
@@ -790,7 +823,7 @@ class _ActorType:
|
|
|
790
823
|
return ActorRun.model_validate(api_result)
|
|
791
824
|
|
|
792
825
|
def _get_remaining_time(self) -> timedelta | None:
|
|
793
|
-
"""Get time remaining from the
|
|
826
|
+
"""Get time remaining from the Actor timeout. Returns `None` if not on an Apify platform."""
|
|
794
827
|
if self.is_at_home() and self.configuration.timeout_at:
|
|
795
828
|
return self.configuration.timeout_at - datetime.now(tz=timezone.utc)
|
|
796
829
|
|
|
@@ -826,7 +859,7 @@ class _ActorType:
|
|
|
826
859
|
"""
|
|
827
860
|
self._raise_if_not_initialized()
|
|
828
861
|
|
|
829
|
-
client = self.new_client(token=token) if token else self.
|
|
862
|
+
client = self.new_client(token=token) if token else self.apify_client
|
|
830
863
|
|
|
831
864
|
if status_message:
|
|
832
865
|
await client.run(run_id).update(status_message=status_message)
|
|
@@ -879,7 +912,7 @@ class _ActorType:
|
|
|
879
912
|
"""
|
|
880
913
|
self._raise_if_not_initialized()
|
|
881
914
|
|
|
882
|
-
client = self.new_client(token=token) if token else self.
|
|
915
|
+
client = self.new_client(token=token) if token else self.apify_client
|
|
883
916
|
|
|
884
917
|
if webhooks:
|
|
885
918
|
serialized_webhooks = [
|
|
@@ -951,7 +984,7 @@ class _ActorType:
|
|
|
951
984
|
"""
|
|
952
985
|
self._raise_if_not_initialized()
|
|
953
986
|
|
|
954
|
-
client = self.new_client(token=token) if token else self.
|
|
987
|
+
client = self.new_client(token=token) if token else self.apify_client
|
|
955
988
|
|
|
956
989
|
if webhooks:
|
|
957
990
|
serialized_webhooks = [
|
|
@@ -1002,13 +1035,13 @@ class _ActorType:
|
|
|
1002
1035
|
return
|
|
1003
1036
|
|
|
1004
1037
|
if not custom_after_sleep:
|
|
1005
|
-
custom_after_sleep = self.
|
|
1038
|
+
custom_after_sleep = self.configuration.metamorph_after_sleep
|
|
1006
1039
|
|
|
1007
|
-
# If is_at_home() is True,
|
|
1008
|
-
if not self.
|
|
1040
|
+
# If is_at_home() is True, configuration.actor_run_id is always set
|
|
1041
|
+
if not self.configuration.actor_run_id:
|
|
1009
1042
|
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
|
|
1010
1043
|
|
|
1011
|
-
await self.
|
|
1044
|
+
await self.apify_client.run(self.configuration.actor_run_id).metamorph(
|
|
1012
1045
|
target_actor_id=target_actor_id,
|
|
1013
1046
|
run_input=run_input,
|
|
1014
1047
|
target_actor_build=target_actor_build,
|
|
@@ -1045,7 +1078,7 @@ class _ActorType:
|
|
|
1045
1078
|
_ActorType._is_rebooting = True
|
|
1046
1079
|
|
|
1047
1080
|
if not custom_after_sleep:
|
|
1048
|
-
custom_after_sleep = self.
|
|
1081
|
+
custom_after_sleep = self.configuration.metamorph_after_sleep
|
|
1049
1082
|
|
|
1050
1083
|
# Call all the listeners for the PERSIST_STATE and MIGRATING events, and wait for them to finish.
|
|
1051
1084
|
# PERSIST_STATE listeners are called to allow the Actor to persist its state before the reboot.
|
|
@@ -1054,10 +1087,10 @@ class _ActorType:
|
|
|
1054
1087
|
# We can't just emit the events and wait for all listeners to finish,
|
|
1055
1088
|
# because this method might be called from an event listener itself, and we would deadlock.
|
|
1056
1089
|
persist_state_listeners = flatten(
|
|
1057
|
-
(self.
|
|
1090
|
+
(self.event_manager._listeners_to_wrappers[Event.PERSIST_STATE] or {}).values() # noqa: SLF001
|
|
1058
1091
|
)
|
|
1059
1092
|
migrating_listeners = flatten(
|
|
1060
|
-
(self.
|
|
1093
|
+
(self.event_manager._listeners_to_wrappers[Event.MIGRATING] or {}).values() # noqa: SLF001
|
|
1061
1094
|
)
|
|
1062
1095
|
|
|
1063
1096
|
await asyncio.gather(
|
|
@@ -1065,10 +1098,10 @@ class _ActorType:
|
|
|
1065
1098
|
*[listener(EventMigratingData()) for listener in migrating_listeners],
|
|
1066
1099
|
)
|
|
1067
1100
|
|
|
1068
|
-
if not self.
|
|
1101
|
+
if not self.configuration.actor_run_id:
|
|
1069
1102
|
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
|
|
1070
1103
|
|
|
1071
|
-
await self.
|
|
1104
|
+
await self.apify_client.run(self.configuration.actor_run_id).reboot()
|
|
1072
1105
|
|
|
1073
1106
|
if custom_after_sleep:
|
|
1074
1107
|
await asyncio.sleep(custom_after_sleep.total_seconds())
|
|
@@ -1107,11 +1140,11 @@ class _ActorType:
|
|
|
1107
1140
|
return
|
|
1108
1141
|
|
|
1109
1142
|
# If is_at_home() is True, config.actor_run_id is always set
|
|
1110
|
-
if not self.
|
|
1143
|
+
if not self.configuration.actor_run_id:
|
|
1111
1144
|
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
|
|
1112
1145
|
|
|
1113
|
-
await self.
|
|
1114
|
-
actor_run_id=self.
|
|
1146
|
+
await self.apify_client.webhooks().create(
|
|
1147
|
+
actor_run_id=self.configuration.actor_run_id,
|
|
1115
1148
|
event_types=webhook.event_types,
|
|
1116
1149
|
request_url=webhook.request_url,
|
|
1117
1150
|
payload_template=webhook.payload_template,
|
|
@@ -1143,10 +1176,10 @@ class _ActorType:
|
|
|
1143
1176
|
return None
|
|
1144
1177
|
|
|
1145
1178
|
# If is_at_home() is True, config.actor_run_id is always set
|
|
1146
|
-
if not self.
|
|
1179
|
+
if not self.configuration.actor_run_id:
|
|
1147
1180
|
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
|
|
1148
1181
|
|
|
1149
|
-
api_result = await self.
|
|
1182
|
+
api_result = await self.apify_client.run(self.configuration.actor_run_id).update(
|
|
1150
1183
|
status_message=status_message, is_status_message_terminal=is_terminal
|
|
1151
1184
|
)
|
|
1152
1185
|
|
|
@@ -1156,7 +1189,7 @@ class _ActorType:
|
|
|
1156
1189
|
self,
|
|
1157
1190
|
*,
|
|
1158
1191
|
actor_proxy_input: dict
|
|
1159
|
-
| None = None, # this is the raw proxy input from the
|
|
1192
|
+
| None = None, # this is the raw proxy input from the Actor run input, it is not spread or snake_cased in here
|
|
1160
1193
|
password: str | None = None,
|
|
1161
1194
|
groups: list[str] | None = None,
|
|
1162
1195
|
country_code: str | None = None,
|
|
@@ -1201,7 +1234,7 @@ class _ActorType:
|
|
|
1201
1234
|
country_code=country_code,
|
|
1202
1235
|
proxy_urls=proxy_urls,
|
|
1203
1236
|
new_url_function=new_url_function,
|
|
1204
|
-
_actor_config=self.
|
|
1237
|
+
_actor_config=self.configuration,
|
|
1205
1238
|
_apify_client=self._apify_client,
|
|
1206
1239
|
)
|
|
1207
1240
|
|
apify/_charging.py
CHANGED
|
@@ -64,6 +64,16 @@ class ChargingManager(Protocol):
|
|
|
64
64
|
This can be used for instance when your code needs to support multiple pricing models in transition periods.
|
|
65
65
|
"""
|
|
66
66
|
|
|
67
|
+
def get_charged_event_count(self, event_name: str) -> int:
|
|
68
|
+
"""Get the number of events with the given name that were charged so far.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
event_name: Name of the inspected event.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def get_max_total_charge_usd(self) -> Decimal:
|
|
75
|
+
"""Get the configured maximum total charge for this Actor run."""
|
|
76
|
+
|
|
67
77
|
|
|
68
78
|
@docs_group('Charging')
|
|
69
79
|
@dataclass(frozen=True)
|
|
@@ -309,6 +319,15 @@ class ChargingManagerImplementation(ChargingManager):
|
|
|
309
319
|
},
|
|
310
320
|
)
|
|
311
321
|
|
|
322
|
+
@ensure_context
|
|
323
|
+
def get_charged_event_count(self, event_name: str) -> int:
|
|
324
|
+
item = self._charging_state.get(event_name)
|
|
325
|
+
return item.charge_count if item is not None else 0
|
|
326
|
+
|
|
327
|
+
@ensure_context
|
|
328
|
+
def get_max_total_charge_usd(self) -> Decimal:
|
|
329
|
+
return self._max_total_charge_usd
|
|
330
|
+
|
|
312
331
|
|
|
313
332
|
@dataclass
|
|
314
333
|
class ChargingStateItem:
|