apify 3.4.2b3__tar.gz → 3.4.2b4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apify-3.4.2b3 → apify-3.4.2b4}/CHANGELOG.md +1 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/PKG-INFO +2 -3
- {apify-3.4.2b3 → apify-3.4.2b4}/pyproject.toml +3 -4
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/__init__.py +7 -2
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/_actor.py +92 -84
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/_charging.py +102 -21
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/_configuration.py +17 -2
- apify-3.4.2b4/src/apify/_consts.py +90 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/_proxy_configuration.py +12 -7
- apify-3.4.2b4/src/apify/_webhook.py +62 -0
- apify-3.4.2b4/src/apify/events/__init__.py +6 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/events/_types.py +8 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/requests.py +2 -1
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_apify/_alias_resolving.py +3 -5
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_apify/_api_client_creation.py +7 -9
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_apify/_dataset_client.py +13 -2
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_apify/_key_value_store_client.py +15 -6
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_apify/_models.py +17 -53
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_apify/_request_queue_client.py +19 -18
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_apify/_request_queue_shared_client.py +23 -23
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_apify/_request_queue_single_client.py +26 -24
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_apify/_utils.py +21 -0
- apify-3.4.2b3/src/apify/_consts.py +0 -13
- apify-3.4.2b3/src/apify/_models.py +0 -266
- apify-3.4.2b3/src/apify/events/__init__.py +0 -5
- {apify-3.4.2b3 → apify-3.4.2b4}/.gitignore +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/CONTRIBUTING.md +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/LICENSE +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/README.md +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/_crypto.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/_utils.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/events/_apify_event_manager.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/events/py.typed +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/log.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/py.typed +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/request_loaders/__init__.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/request_loaders/_apify_request_list.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/request_loaders/py.typed +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/__init__.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/_actor_runner.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/_async_thread.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/_logging_config.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/extensions/__init__.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/extensions/_httpcache.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/middlewares/py.typed +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/pipelines/py.typed +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/py.typed +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/scheduler.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/scrapy/utils.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/__init__.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_apify/__init__.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_apify/_storage_client.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_apify/py.typed +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_file_system/__init__.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_file_system/_dataset_client.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_file_system/_key_value_store_client.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_file_system/_storage_client.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_ppe_dataset_mixin.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_smart_apify/__init__.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/_smart_apify/_storage_client.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storage_clients/py.typed +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storages/__init__.py +0 -0
- {apify-3.4.2b3 → apify-3.4.2b4}/src/apify/storages/py.typed +0 -0
|
@@ -9,6 +9,7 @@ All notable changes to this project will be documented in this file.
|
|
|
9
9
|
|
|
10
10
|
- [**breaking**] Remove deprecated APIs ([#918](https://github.com/apify/apify-sdk-python/pull/918)) ([3e5728d](https://github.com/apify/apify-sdk-python/commit/3e5728d94cb8fd879d5a76e33a03d55792d835d5)) by [@vdusek](https://github.com/vdusek), closes [#635](https://github.com/apify/apify-sdk-python/issues/635)
|
|
11
11
|
- [**breaking**] Mark secondary arguments as keyword-only ([#917](https://github.com/apify/apify-sdk-python/pull/917)) ([eb94c99](https://github.com/apify/apify-sdk-python/commit/eb94c992ec4aba1cd7cf4dfd7a98731cb304651b)) by [@vdusek](https://github.com/vdusek), closes [#881](https://github.com/apify/apify-sdk-python/issues/881)
|
|
12
|
+
- [**breaking**] Adapt to apify-client v3 ([#719](https://github.com/apify/apify-sdk-python/pull/719)) ([10203bc](https://github.com/apify/apify-sdk-python/commit/10203bc51e67590c97938b37d81614376bc3d29a)) by [@vdusek](https://github.com/vdusek), closes [#697](https://github.com/apify/apify-sdk-python/issues/697), [#736](https://github.com/apify/apify-sdk-python/issues/736), [#770](https://github.com/apify/apify-sdk-python/issues/770), [#853](https://github.com/apify/apify-sdk-python/issues/853)
|
|
12
13
|
|
|
13
14
|
### ⚙️ Miscellaneous Tasks
|
|
14
15
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 3.4.
|
|
3
|
+
Version: 3.4.2b4
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
|
|
@@ -225,8 +225,7 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
225
225
|
Classifier: Programming Language :: Python :: 3.14
|
|
226
226
|
Classifier: Topic :: Software Development :: Libraries
|
|
227
227
|
Requires-Python: >=3.11
|
|
228
|
-
Requires-Dist: apify-client<
|
|
229
|
-
Requires-Dist: apify-shared<3.0.0,>=2.0.0
|
|
228
|
+
Requires-Dist: apify-client<4.0.0,>=3.0.0
|
|
230
229
|
Requires-Dist: cachetools>=5.5.0
|
|
231
230
|
Requires-Dist: crawlee<2.0.0,>=1.0.4
|
|
232
231
|
Requires-Dist: cryptography>=42.0.0
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "apify"
|
|
7
|
-
version = "3.4.
|
|
7
|
+
version = "3.4.2b4"
|
|
8
8
|
description = "Apify SDK for Python"
|
|
9
9
|
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -34,8 +34,7 @@ keywords = [
|
|
|
34
34
|
"scraping",
|
|
35
35
|
]
|
|
36
36
|
dependencies = [
|
|
37
|
-
"apify-client>=
|
|
38
|
-
"apify-shared>=2.0.0,<3.0.0",
|
|
37
|
+
"apify-client>=3.0.0,<4.0.0",
|
|
39
38
|
"crawlee>=1.0.4,<2.0.0",
|
|
40
39
|
"cachetools>=5.5.0",
|
|
41
40
|
"cryptography>=42.0.0",
|
|
@@ -197,7 +196,7 @@ builtins-ignorelist = ["id"]
|
|
|
197
196
|
|
|
198
197
|
[tool.ruff.lint.isort]
|
|
199
198
|
known-local-folder = ["apify"]
|
|
200
|
-
known-first-party = ["apify_client", "
|
|
199
|
+
known-first-party = ["apify_client", "crawlee"]
|
|
201
200
|
|
|
202
201
|
[tool.ruff.lint.pylint]
|
|
203
202
|
max-branches = 18
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from importlib import metadata
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from apify_client._literals import WebhookEventType
|
|
4
4
|
from crawlee import Request
|
|
5
5
|
from crawlee.events import (
|
|
6
6
|
Event,
|
|
@@ -14,13 +14,18 @@ from crawlee.events import (
|
|
|
14
14
|
|
|
15
15
|
from apify._actor import Actor
|
|
16
16
|
from apify._configuration import Configuration
|
|
17
|
-
from apify.
|
|
17
|
+
from apify._consts import ActorEnvVars, ApifyEnvVars
|
|
18
18
|
from apify._proxy_configuration import ProxyConfiguration, ProxyInfo
|
|
19
|
+
from apify._webhook import Webhook
|
|
20
|
+
from apify.events._types import ActorEventTypes
|
|
19
21
|
|
|
20
22
|
__version__ = metadata.version('apify')
|
|
21
23
|
|
|
22
24
|
__all__ = [
|
|
23
25
|
'Actor',
|
|
26
|
+
'ActorEnvVars',
|
|
27
|
+
'ActorEventTypes',
|
|
28
|
+
'ApifyEnvVars',
|
|
24
29
|
'Configuration',
|
|
25
30
|
'Event',
|
|
26
31
|
'EventAbortingData',
|
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import sys
|
|
5
|
+
import warnings
|
|
5
6
|
from contextlib import suppress
|
|
6
7
|
from datetime import UTC, datetime, timedelta
|
|
7
8
|
from functools import cached_property
|
|
@@ -12,7 +13,6 @@ from more_itertools import flatten
|
|
|
12
13
|
from pydantic import AliasChoices
|
|
13
14
|
|
|
14
15
|
from apify_client import ApifyClientAsync
|
|
15
|
-
from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
|
|
16
16
|
from crawlee import service_locator
|
|
17
17
|
from crawlee.errors import ServiceConflictError
|
|
18
18
|
from crawlee.events import (
|
|
@@ -27,11 +27,11 @@ from crawlee.events import (
|
|
|
27
27
|
|
|
28
28
|
from apify._charging import DEFAULT_DATASET_ITEM_EVENT, ChargeResult, ChargingManager, ChargingManagerImplementation
|
|
29
29
|
from apify._configuration import Configuration
|
|
30
|
-
from apify._consts import EVENT_LISTENERS_TIMEOUT
|
|
30
|
+
from apify._consts import EVENT_LISTENERS_TIMEOUT, EXIT_CODE_ERROR_USER_FUNCTION_THREW, ActorEnvVars, ApifyEnvVars
|
|
31
31
|
from apify._crypto import decrypt_input_secrets, load_private_key
|
|
32
|
-
from apify._models import ActorRun
|
|
33
32
|
from apify._proxy_configuration import ProxyConfiguration
|
|
34
33
|
from apify._utils import docs_group, docs_name, ensure_context, get_system_info, is_running_in_ipython
|
|
34
|
+
from apify._webhook import to_client_representations
|
|
35
35
|
from apify.events import ApifyEventManager, EventManager, LocalEventManager
|
|
36
36
|
from apify.log import _configure_logging, logger
|
|
37
37
|
from apify.storage_clients import ApifyStorageClient, SmartApifyStorageClient
|
|
@@ -45,11 +45,12 @@ if TYPE_CHECKING:
|
|
|
45
45
|
from types import TracebackType
|
|
46
46
|
from typing import Self
|
|
47
47
|
|
|
48
|
-
from
|
|
48
|
+
from apify_client._literals import ActorPermissionLevel
|
|
49
|
+
from apify_client._models import Run
|
|
49
50
|
from crawlee._types import JsonSerializable
|
|
50
51
|
from crawlee.proxy_configuration import _NewUrlFunction
|
|
51
52
|
|
|
52
|
-
from apify.
|
|
53
|
+
from apify._webhook import Webhook
|
|
53
54
|
|
|
54
55
|
MainReturnType = TypeVar('MainReturnType')
|
|
55
56
|
|
|
@@ -234,7 +235,7 @@ class _ActorType:
|
|
|
234
235
|
# In IPython, we don't run `sys.exit()` during Actor exits,
|
|
235
236
|
# so the exception traceback will be printed on its own
|
|
236
237
|
self.log.exception('Actor failed with an exception', exc_info=exc_value)
|
|
237
|
-
self.exit_code =
|
|
238
|
+
self.exit_code = EXIT_CODE_ERROR_USER_FUNCTION_THREW
|
|
238
239
|
|
|
239
240
|
self._is_exiting = True
|
|
240
241
|
self.log.info('Exiting Actor', extra={'exit_code': self.exit_code})
|
|
@@ -504,19 +505,31 @@ class _ActorType:
|
|
|
504
505
|
max_retries: How many times to retry a failed request at most.
|
|
505
506
|
min_delay_between_retries: How long will the client wait between retrying requests
|
|
506
507
|
(increases exponentially from this value).
|
|
507
|
-
timeout:
|
|
508
|
+
timeout: Baseline HTTP timeout for medium-duration API operations. The underlying client uses
|
|
509
|
+
separate timeout tiers for short/medium/long/max-duration calls; passing a value here scales
|
|
510
|
+
all four tiers proportionally (short = `timeout / 6`, long = `timeout * 12`,
|
|
511
|
+
max = `timeout * 12`).
|
|
508
512
|
"""
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
token
|
|
513
|
-
api_url
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
513
|
+
# Forward only the explicitly provided options; omitting the rest lets `ApifyClientAsync` apply its
|
|
514
|
+
# own defaults, so the SDK doesn't have to import and re-pass the client's private default constants.
|
|
515
|
+
client_kwargs: dict[str, Any] = {
|
|
516
|
+
'token': token or self.configuration.token,
|
|
517
|
+
'api_url': api_url or self.configuration.api_base_url,
|
|
518
|
+
}
|
|
519
|
+
if max_retries is not None:
|
|
520
|
+
client_kwargs['max_retries'] = max_retries
|
|
521
|
+
if min_delay_between_retries is not None:
|
|
522
|
+
client_kwargs['min_delay_between_retries'] = min_delay_between_retries
|
|
523
|
+
if timeout is not None:
|
|
524
|
+
# `apify-client` v3 splits the timeout into four tiers; scale them from the single baseline,
|
|
525
|
+
# mirroring the client's default ratios (medium = baseline, short = baseline / 6,
|
|
526
|
+
# long = max = baseline * 12).
|
|
527
|
+
client_kwargs['timeout_short'] = timeout / 6
|
|
528
|
+
client_kwargs['timeout_medium'] = timeout
|
|
529
|
+
client_kwargs['timeout_long'] = timeout * 12
|
|
530
|
+
client_kwargs['timeout_max'] = timeout * 12
|
|
531
|
+
|
|
532
|
+
return ApifyClientAsync(**client_kwargs)
|
|
520
533
|
|
|
521
534
|
@_ensure_context
|
|
522
535
|
async def open_dataset(
|
|
@@ -870,7 +883,7 @@ class _ActorType:
|
|
|
870
883
|
force_permission_level: ActorPermissionLevel | None = None,
|
|
871
884
|
wait_for_finish: int | None = None,
|
|
872
885
|
webhooks: list[Webhook] | None = None,
|
|
873
|
-
) ->
|
|
886
|
+
) -> Run:
|
|
874
887
|
"""Run an Actor on the Apify platform.
|
|
875
888
|
|
|
876
889
|
Unlike `Actor.call`, this method just starts the run without waiting for finish.
|
|
@@ -903,13 +916,6 @@ class _ActorType:
|
|
|
903
916
|
"""
|
|
904
917
|
client = self.new_client(token=token) if token else self.apify_client
|
|
905
918
|
|
|
906
|
-
if webhooks:
|
|
907
|
-
serialized_webhooks = [
|
|
908
|
-
hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks
|
|
909
|
-
]
|
|
910
|
-
else:
|
|
911
|
-
serialized_webhooks = None
|
|
912
|
-
|
|
913
919
|
if timeout == 'inherit':
|
|
914
920
|
actor_start_timeout = self._get_remaining_time()
|
|
915
921
|
elif timeout is None:
|
|
@@ -919,21 +925,20 @@ class _ActorType:
|
|
|
919
925
|
else:
|
|
920
926
|
raise ValueError(f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, or a `timedelta`.')
|
|
921
927
|
|
|
922
|
-
|
|
928
|
+
actor_client = client.actor(actor_id)
|
|
929
|
+
return await actor_client.start(
|
|
923
930
|
run_input=run_input,
|
|
924
931
|
content_type=content_type,
|
|
925
932
|
build=build,
|
|
926
933
|
max_total_charge_usd=max_total_charge_usd,
|
|
927
934
|
restart_on_error=restart_on_error,
|
|
928
935
|
memory_mbytes=memory_mbytes,
|
|
929
|
-
|
|
936
|
+
run_timeout=actor_start_timeout,
|
|
930
937
|
force_permission_level=force_permission_level,
|
|
931
938
|
wait_for_finish=wait_for_finish,
|
|
932
|
-
webhooks=
|
|
939
|
+
webhooks=to_client_representations(webhooks),
|
|
933
940
|
)
|
|
934
941
|
|
|
935
|
-
return ActorRun.model_validate(api_result)
|
|
936
|
-
|
|
937
942
|
@_ensure_context
|
|
938
943
|
async def abort(
|
|
939
944
|
self,
|
|
@@ -942,7 +947,7 @@ class _ActorType:
|
|
|
942
947
|
token: str | None = None,
|
|
943
948
|
status_message: str | None = None,
|
|
944
949
|
gracefully: bool | None = None,
|
|
945
|
-
) ->
|
|
950
|
+
) -> Run:
|
|
946
951
|
"""Abort given Actor run on the Apify platform using the current user account.
|
|
947
952
|
|
|
948
953
|
The user account is determined by the `APIFY_TOKEN` environment variable.
|
|
@@ -959,13 +964,17 @@ class _ActorType:
|
|
|
959
964
|
Info about the aborted Actor run.
|
|
960
965
|
"""
|
|
961
966
|
client = self.new_client(token=token) if token else self.apify_client
|
|
967
|
+
run_client = client.run(run_id)
|
|
962
968
|
|
|
963
969
|
if status_message:
|
|
964
|
-
await
|
|
970
|
+
await run_client.update(status_message=status_message)
|
|
971
|
+
|
|
972
|
+
run = await run_client.abort(gracefully=gracefully)
|
|
965
973
|
|
|
966
|
-
|
|
974
|
+
if run is None:
|
|
975
|
+
raise RuntimeError(f'Failed to abort Actor run with ID "{run_id}".')
|
|
967
976
|
|
|
968
|
-
return
|
|
977
|
+
return run
|
|
969
978
|
|
|
970
979
|
@_ensure_context
|
|
971
980
|
async def call(
|
|
@@ -984,7 +993,7 @@ class _ActorType:
|
|
|
984
993
|
webhooks: list[Webhook] | None = None,
|
|
985
994
|
wait: timedelta | None = None,
|
|
986
995
|
logger: logging.Logger | None | Literal['default'] = 'default',
|
|
987
|
-
) ->
|
|
996
|
+
) -> Run:
|
|
988
997
|
"""Start an Actor on the Apify Platform and wait for it to finish before returning.
|
|
989
998
|
|
|
990
999
|
It waits indefinitely, unless the wait argument is provided.
|
|
@@ -1020,13 +1029,6 @@ class _ActorType:
|
|
|
1020
1029
|
"""
|
|
1021
1030
|
client = self.new_client(token=token) if token else self.apify_client
|
|
1022
1031
|
|
|
1023
|
-
if webhooks:
|
|
1024
|
-
serialized_webhooks = [
|
|
1025
|
-
hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks
|
|
1026
|
-
]
|
|
1027
|
-
else:
|
|
1028
|
-
serialized_webhooks = None
|
|
1029
|
-
|
|
1030
1032
|
if timeout == 'inherit':
|
|
1031
1033
|
actor_call_timeout = self._get_remaining_time()
|
|
1032
1034
|
elif timeout is None:
|
|
@@ -1036,21 +1038,25 @@ class _ActorType:
|
|
|
1036
1038
|
else:
|
|
1037
1039
|
raise ValueError(f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, or a `timedelta`.')
|
|
1038
1040
|
|
|
1039
|
-
|
|
1041
|
+
actor_client = client.actor(actor_id)
|
|
1042
|
+
run = await actor_client.call(
|
|
1040
1043
|
run_input=run_input,
|
|
1041
1044
|
content_type=content_type,
|
|
1042
1045
|
build=build,
|
|
1043
1046
|
max_total_charge_usd=max_total_charge_usd,
|
|
1044
1047
|
restart_on_error=restart_on_error,
|
|
1045
1048
|
memory_mbytes=memory_mbytes,
|
|
1046
|
-
|
|
1049
|
+
run_timeout=actor_call_timeout,
|
|
1047
1050
|
force_permission_level=force_permission_level,
|
|
1048
|
-
webhooks=
|
|
1049
|
-
|
|
1051
|
+
webhooks=to_client_representations(webhooks),
|
|
1052
|
+
wait_duration=wait,
|
|
1050
1053
|
logger=logger,
|
|
1051
1054
|
)
|
|
1052
1055
|
|
|
1053
|
-
|
|
1056
|
+
if run is None:
|
|
1057
|
+
raise RuntimeError(f'Failed to call Actor with ID "{actor_id}".')
|
|
1058
|
+
|
|
1059
|
+
return run
|
|
1054
1060
|
|
|
1055
1061
|
@_ensure_context
|
|
1056
1062
|
async def call_task(
|
|
@@ -1065,7 +1071,7 @@ class _ActorType:
|
|
|
1065
1071
|
webhooks: list[Webhook] | None = None,
|
|
1066
1072
|
wait: timedelta | None = None,
|
|
1067
1073
|
token: str | None = None,
|
|
1068
|
-
) ->
|
|
1074
|
+
) -> Run:
|
|
1069
1075
|
"""Start an Actor task on the Apify Platform and wait for it to finish before returning.
|
|
1070
1076
|
|
|
1071
1077
|
It waits indefinitely, unless the wait argument is provided.
|
|
@@ -1098,13 +1104,6 @@ class _ActorType:
|
|
|
1098
1104
|
"""
|
|
1099
1105
|
client = self.new_client(token=token) if token else self.apify_client
|
|
1100
1106
|
|
|
1101
|
-
if webhooks:
|
|
1102
|
-
serialized_webhooks = [
|
|
1103
|
-
hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks
|
|
1104
|
-
]
|
|
1105
|
-
else:
|
|
1106
|
-
serialized_webhooks = None
|
|
1107
|
-
|
|
1108
1107
|
if timeout == 'inherit':
|
|
1109
1108
|
task_call_timeout = self._get_remaining_time()
|
|
1110
1109
|
elif timeout is None:
|
|
@@ -1114,17 +1113,21 @@ class _ActorType:
|
|
|
1114
1113
|
else:
|
|
1115
1114
|
raise ValueError(f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, or a `timedelta`.')
|
|
1116
1115
|
|
|
1117
|
-
|
|
1116
|
+
task_client = client.task(task_id)
|
|
1117
|
+
run = await task_client.call(
|
|
1118
1118
|
task_input=task_input,
|
|
1119
1119
|
build=build,
|
|
1120
1120
|
restart_on_error=restart_on_error,
|
|
1121
1121
|
memory_mbytes=memory_mbytes,
|
|
1122
|
-
|
|
1123
|
-
webhooks=
|
|
1124
|
-
|
|
1122
|
+
run_timeout=task_call_timeout,
|
|
1123
|
+
webhooks=to_client_representations(webhooks),
|
|
1124
|
+
wait_duration=wait,
|
|
1125
1125
|
)
|
|
1126
1126
|
|
|
1127
|
-
|
|
1127
|
+
if run is None:
|
|
1128
|
+
raise RuntimeError(f'Failed to call Task with ID "{task_id}".')
|
|
1129
|
+
|
|
1130
|
+
return run
|
|
1128
1131
|
|
|
1129
1132
|
@_ensure_context
|
|
1130
1133
|
async def metamorph(
|
|
@@ -1238,14 +1241,7 @@ class _ActorType:
|
|
|
1238
1241
|
await asyncio.sleep(custom_after_sleep.total_seconds())
|
|
1239
1242
|
|
|
1240
1243
|
@_ensure_context
|
|
1241
|
-
async def add_webhook(
|
|
1242
|
-
self,
|
|
1243
|
-
webhook: Webhook,
|
|
1244
|
-
*,
|
|
1245
|
-
ignore_ssl_errors: bool | None = None,
|
|
1246
|
-
do_not_retry: bool | None = None,
|
|
1247
|
-
idempotency_key: str | None = None,
|
|
1248
|
-
) -> None:
|
|
1244
|
+
async def add_webhook(self, webhook: Webhook, *, idempotency_key: str | None = None) -> None:
|
|
1249
1245
|
"""Create an ad-hoc webhook for the current Actor run.
|
|
1250
1246
|
|
|
1251
1247
|
This webhook lets you receive a notification when the Actor run finished or failed.
|
|
@@ -1256,15 +1252,18 @@ class _ActorType:
|
|
|
1256
1252
|
For more information about Apify Actor webhooks, please see the [documentation](https://docs.apify.com/webhooks).
|
|
1257
1253
|
|
|
1258
1254
|
Args:
|
|
1259
|
-
webhook: The webhook to be added
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
idempotency_key: A unique identifier of a webhook. You can use it to ensure that you won't create
|
|
1263
|
-
the same webhook multiple times.
|
|
1264
|
-
|
|
1265
|
-
Returns:
|
|
1266
|
-
The created webhook.
|
|
1255
|
+
webhook: The webhook to be added. It is automatically bound to the current Actor run.
|
|
1256
|
+
idempotency_key: Deprecated. Pass `idempotency_key` on the `Webhook` instance instead.
|
|
1257
|
+
Will be removed in version 5.0.0.
|
|
1267
1258
|
"""
|
|
1259
|
+
if idempotency_key is not None:
|
|
1260
|
+
warnings.warn(
|
|
1261
|
+
'Passing `idempotency_key` to `Actor.add_webhook()` is deprecated and will be removed in version '
|
|
1262
|
+
'5.0.0. Set it on the `Webhook` instance instead.',
|
|
1263
|
+
DeprecationWarning,
|
|
1264
|
+
stacklevel=2,
|
|
1265
|
+
)
|
|
1266
|
+
|
|
1268
1267
|
if not self.is_at_home():
|
|
1269
1268
|
self.log.error('Actor.add_webhook() is only supported when running on the Apify platform.')
|
|
1270
1269
|
return
|
|
@@ -1278,9 +1277,11 @@ class _ActorType:
|
|
|
1278
1277
|
event_types=webhook.event_types,
|
|
1279
1278
|
request_url=webhook.request_url,
|
|
1280
1279
|
payload_template=webhook.payload_template,
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1280
|
+
headers_template=webhook.headers_template,
|
|
1281
|
+
ignore_ssl_errors=webhook.ignore_ssl_errors,
|
|
1282
|
+
do_not_retry=webhook.do_not_retry,
|
|
1283
|
+
idempotency_key=idempotency_key if idempotency_key is not None else webhook.idempotency_key,
|
|
1284
|
+
is_ad_hoc=True,
|
|
1284
1285
|
)
|
|
1285
1286
|
|
|
1286
1287
|
@_ensure_context
|
|
@@ -1289,7 +1290,7 @@ class _ActorType:
|
|
|
1289
1290
|
status_message: str,
|
|
1290
1291
|
*,
|
|
1291
1292
|
is_terminal: bool | None = None,
|
|
1292
|
-
) ->
|
|
1293
|
+
) -> Run | None:
|
|
1293
1294
|
"""Set the status message for the current Actor run.
|
|
1294
1295
|
|
|
1295
1296
|
Args:
|
|
@@ -1308,11 +1309,18 @@ class _ActorType:
|
|
|
1308
1309
|
if not self.configuration.actor_run_id:
|
|
1309
1310
|
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
|
|
1310
1311
|
|
|
1311
|
-
|
|
1312
|
-
|
|
1312
|
+
run_client = self.apify_client.run(self.configuration.actor_run_id)
|
|
1313
|
+
run = await run_client.update(
|
|
1314
|
+
status_message=status_message,
|
|
1315
|
+
is_status_message_terminal=is_terminal,
|
|
1313
1316
|
)
|
|
1314
1317
|
|
|
1315
|
-
|
|
1318
|
+
if run is None:
|
|
1319
|
+
raise RuntimeError(
|
|
1320
|
+
f'Failed to set status message for Actor run with ID "{self.configuration.actor_run_id}".'
|
|
1321
|
+
)
|
|
1322
|
+
|
|
1323
|
+
return run
|
|
1316
1324
|
|
|
1317
1325
|
@_ensure_context
|
|
1318
1326
|
async def create_proxy_configuration(
|
|
@@ -5,18 +5,18 @@ from contextvars import ContextVar
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from datetime import UTC, datetime
|
|
7
7
|
from decimal import Decimal
|
|
8
|
-
from typing import TYPE_CHECKING, Protocol, TypedDict
|
|
8
|
+
from typing import TYPE_CHECKING, Annotated, Literal, Protocol, TypedDict
|
|
9
9
|
|
|
10
|
-
from pydantic import
|
|
10
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
11
|
+
|
|
12
|
+
import apify_client._models as _client_models
|
|
13
|
+
from apify_client._models import ActorChargeEvent as ClientActorChargeEvent
|
|
14
|
+
from apify_client._models import FlatPricePerMonthActorPricingInfo as ClientFlatPricePerMonth
|
|
15
|
+
from apify_client._models import FreeActorPricingInfo as ClientFree
|
|
16
|
+
from apify_client._models import PayPerEventActorPricingInfo as ClientPayPerEvent
|
|
17
|
+
from apify_client._models import PricePerDatasetItemActorPricingInfo as ClientPricePerDatasetItem
|
|
18
|
+
from apify_client._models import PricingPerEvent as ClientPricingPerEvent
|
|
11
19
|
|
|
12
|
-
from apify._models import (
|
|
13
|
-
ActorRun,
|
|
14
|
-
FlatPricePerMonthActorPricingInfo,
|
|
15
|
-
FreeActorPricingInfo,
|
|
16
|
-
PayPerEventActorPricingInfo,
|
|
17
|
-
PricePerDatasetItemActorPricingInfo,
|
|
18
|
-
PricingModel,
|
|
19
|
-
)
|
|
20
20
|
from apify._utils import ReentrantLock, docs_group, ensure_context
|
|
21
21
|
from apify.log import logger
|
|
22
22
|
from apify.storages import Dataset
|
|
@@ -28,7 +28,8 @@ if TYPE_CHECKING:
|
|
|
28
28
|
|
|
29
29
|
from apify._configuration import Configuration
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
PricingModel = Literal['PAY_PER_EVENT', 'PRICE_PER_DATASET_ITEM', 'FLAT_PRICE_PER_MONTH', 'FREE']
|
|
32
|
+
"""Pricing model for an Actor."""
|
|
32
33
|
|
|
33
34
|
DEFAULT_DATASET_ITEM_EVENT = 'apify-default-dataset-item'
|
|
34
35
|
|
|
@@ -39,6 +40,80 @@ charging_manager_ctx: ContextVar[ChargingManager | None] = ContextVar('charging_
|
|
|
39
40
|
_ensure_context = ensure_context('active')
|
|
40
41
|
|
|
41
42
|
|
|
43
|
+
# These are thin subclasses of the `apify-client` pricing models. The Apify platform serializes Actor
|
|
44
|
+
# pricing info into the `APIFY_ACTOR_PRICING_INFO` env var (parsed by `Configuration.actor_pricing_info`),
|
|
45
|
+
# but omits several fields that `apify-client` v3 marks as required (`apifyMarginPercentage`, `createdAt`,
|
|
46
|
+
# `startedAt`, per-event `eventDescription`, and per-variant `trialMinutes` / `pricePerUnitUsd` / `unitName`).
|
|
47
|
+
# Each subclass relaxes only those omitted fields to optional, so the env var deserializes without faking
|
|
48
|
+
# values. Because every subclass is-a `apify-client` model, the API-returned `Run.pricing_info` (already an
|
|
49
|
+
# `apify-client` instance) flows through the same code paths without conversion.
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class _RelaxedPricingMetadata(BaseModel):
|
|
53
|
+
"""Mixin relaxing the `CommonActorPricingInfo` metadata fields the platform env var omits."""
|
|
54
|
+
|
|
55
|
+
model_config = ConfigDict(populate_by_name=True, extra='allow')
|
|
56
|
+
|
|
57
|
+
apify_margin_percentage: Annotated[float | None, Field(alias='apifyMarginPercentage')] = None
|
|
58
|
+
created_at: Annotated[datetime | None, Field(alias='createdAt')] = None
|
|
59
|
+
started_at: Annotated[datetime | None, Field(alias='startedAt')] = None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@docs_group('Charging')
|
|
63
|
+
class ActorChargeEvent(ClientActorChargeEvent):
|
|
64
|
+
# `event_description` is required in apify-client but omitted from the env var.
|
|
65
|
+
event_description: Annotated[str | None, Field(alias='eventDescription')] = None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@docs_group('Charging')
|
|
69
|
+
class PricingPerEvent(ClientPricingPerEvent):
|
|
70
|
+
actor_charge_events: Annotated[dict[str, ActorChargeEvent] | None, Field(alias='actorChargeEvents')] = None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@docs_group('Charging')
|
|
74
|
+
class FreeActorPricingInfo(_RelaxedPricingMetadata, ClientFree):
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@docs_group('Charging')
|
|
79
|
+
class FlatPricePerMonthActorPricingInfo(_RelaxedPricingMetadata, ClientFlatPricePerMonth):
|
|
80
|
+
trial_minutes: Annotated[int | None, Field(alias='trialMinutes')] = None
|
|
81
|
+
price_per_unit_usd: Annotated[float | None, Field(alias='pricePerUnitUsd')] = None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@docs_group('Charging')
|
|
85
|
+
class PricePerDatasetItemActorPricingInfo(_RelaxedPricingMetadata, ClientPricePerDatasetItem):
|
|
86
|
+
unit_name: Annotated[str | None, Field(alias='unitName')] = None
|
|
87
|
+
# `price_per_unit_usd` is already optional in apify-client - inherited.
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@docs_group('Charging')
|
|
91
|
+
class PayPerEventActorPricingInfo(_RelaxedPricingMetadata, ClientPayPerEvent):
|
|
92
|
+
# Re-typed to the relaxed element so an omitted `eventDescription` validates; the field stays required.
|
|
93
|
+
pricing_per_event: Annotated[PricingPerEvent, Field(alias='pricingPerEvent')]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
ActorPricingInfoModel = ClientFree | ClientFlatPricePerMonth | ClientPricePerDatasetItem | ClientPayPerEvent
|
|
97
|
+
"""Common supertype of both env-var-parsed SDK subclasses and the API-returned `Run.pricing_info`."""
|
|
98
|
+
|
|
99
|
+
# apify-client ships these models with deferred forward refs (`__pydantic_complete__` is False), so the
|
|
100
|
+
# subclasses must be rebuilt - with the `TieredPricing*` names in scope - before the env-var discriminated
|
|
101
|
+
# union can validate standalone.
|
|
102
|
+
_pricing_rebuild_namespace = vars(_client_models) | {
|
|
103
|
+
'ActorChargeEvent': ActorChargeEvent,
|
|
104
|
+
'PricingPerEvent': PricingPerEvent,
|
|
105
|
+
}
|
|
106
|
+
for _pricing_model in (
|
|
107
|
+
ActorChargeEvent,
|
|
108
|
+
PricingPerEvent,
|
|
109
|
+
FreeActorPricingInfo,
|
|
110
|
+
FlatPricePerMonthActorPricingInfo,
|
|
111
|
+
PricePerDatasetItemActorPricingInfo,
|
|
112
|
+
PayPerEventActorPricingInfo,
|
|
113
|
+
):
|
|
114
|
+
_pricing_model.model_rebuild(_types_namespace=_pricing_rebuild_namespace)
|
|
115
|
+
|
|
116
|
+
|
|
42
117
|
@docs_group('Charging')
|
|
43
118
|
class ChargingManager(Protocol):
|
|
44
119
|
"""Provides fine-grained access to pay-per-event functionality.
|
|
@@ -173,6 +248,7 @@ class ChargingManagerImplementation(ChargingManager):
|
|
|
173
248
|
|
|
174
249
|
self._charging_state: dict[str, ChargingStateItem] = {}
|
|
175
250
|
self._pricing_info: dict[str, PricingInfoItem] = {}
|
|
251
|
+
self._tier_priced_events: set[str] = set()
|
|
176
252
|
|
|
177
253
|
self._not_ppe_warning_printed = False
|
|
178
254
|
self.active = False
|
|
@@ -202,10 +278,16 @@ class ChargingManagerImplementation(ChargingManager):
|
|
|
202
278
|
else:
|
|
203
279
|
self._pricing_model = pricing_info.pricing_model if pricing_info else None
|
|
204
280
|
|
|
205
|
-
# Load per-event pricing information
|
|
206
|
-
|
|
281
|
+
# Load per-event pricing information. Check against the apify-client base so both env-var-parsed
|
|
282
|
+
# SDK subclasses and the API-returned model match.
|
|
283
|
+
if isinstance(pricing_info, ClientPayPerEvent):
|
|
207
284
|
actor_charge_events = pricing_info.pricing_per_event.actor_charge_events or {}
|
|
208
285
|
for event_name, event_pricing in actor_charge_events.items():
|
|
286
|
+
if event_pricing.event_price_usd is None:
|
|
287
|
+
# Tier-priced event - not chargeable via the SDK's flat-price path; tracked so a later
|
|
288
|
+
# charge attempt is reported accurately rather than as an "unknown event".
|
|
289
|
+
self._tier_priced_events.add(event_name)
|
|
290
|
+
continue
|
|
209
291
|
self._pricing_info[event_name] = PricingInfoItem(
|
|
210
292
|
price=Decimal(str(event_pricing.event_price_usd)),
|
|
211
293
|
title=event_pricing.event_title,
|
|
@@ -309,6 +391,10 @@ class ChargingManagerImplementation(ChargingManager):
|
|
|
309
391
|
pass
|
|
310
392
|
elif event_name in self._pricing_info:
|
|
311
393
|
await self._client.run(self._actor_run_id).charge(event_name, count=charged_count)
|
|
394
|
+
elif event_name in self._tier_priced_events:
|
|
395
|
+
logger.warning(
|
|
396
|
+
f"Event '{event_name}' is tier-priced and is not chargeable via the pay-per-event API."
|
|
397
|
+
)
|
|
312
398
|
else:
|
|
313
399
|
logger.warning(f"Attempting to charge for an unknown event '{event_name}'")
|
|
314
400
|
|
|
@@ -427,7 +513,8 @@ class ChargingManagerImplementation(ChargingManager):
|
|
|
427
513
|
if self._actor_run_id is None:
|
|
428
514
|
raise RuntimeError('Actor run ID not found even though the Actor is running on Apify')
|
|
429
515
|
|
|
430
|
-
run =
|
|
516
|
+
run = await self._client.run(self._actor_run_id).get()
|
|
517
|
+
|
|
431
518
|
if run is None:
|
|
432
519
|
raise RuntimeError('Actor run not found')
|
|
433
520
|
|
|
@@ -469,12 +556,6 @@ class PricingInfoItem:
|
|
|
469
556
|
|
|
470
557
|
|
|
471
558
|
class _FetchedPricingInfoDict(TypedDict):
|
|
472
|
-
pricing_info:
|
|
473
|
-
FreeActorPricingInfo
|
|
474
|
-
| FlatPricePerMonthActorPricingInfo
|
|
475
|
-
| PricePerDatasetItemActorPricingInfo
|
|
476
|
-
| PayPerEventActorPricingInfo
|
|
477
|
-
| None
|
|
478
|
-
)
|
|
559
|
+
pricing_info: ActorPricingInfoModel | None
|
|
479
560
|
charged_event_counts: dict[str, int]
|
|
480
561
|
max_total_charge_usd: Decimal
|
|
@@ -15,7 +15,7 @@ from crawlee._utils.models import timedelta_ms
|
|
|
15
15
|
from crawlee._utils.urls import validate_http_url
|
|
16
16
|
from crawlee.configuration import Configuration as CrawleeConfiguration
|
|
17
17
|
|
|
18
|
-
from apify.
|
|
18
|
+
from apify._charging import (
|
|
19
19
|
FlatPricePerMonthActorPricingInfo,
|
|
20
20
|
FreeActorPricingInfo,
|
|
21
21
|
PayPerEventActorPricingInfo,
|
|
@@ -71,6 +71,21 @@ def _load_storage_keys(data: None | str | ActorStorages) -> ActorStorages | None
|
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
|
|
74
|
+
def _parse_actor_pricing_info(data: Any) -> Any:
|
|
75
|
+
"""Parse the raw `APIFY_ACTOR_PRICING_INFO` env var value into a pydantic-friendly form.
|
|
76
|
+
|
|
77
|
+
Deserializes a JSON string when needed. Treats `None`, an empty string, and an empty/
|
|
78
|
+
discriminator-less JSON object (`{}` - the value the platform sets for Actors without a configured
|
|
79
|
+
pricing model) as "no pricing info" so the union validator doesn't fail on a missing discriminator.
|
|
80
|
+
"""
|
|
81
|
+
if data is None or data == '':
|
|
82
|
+
return None
|
|
83
|
+
pricing_info = json.loads(data) if isinstance(data, str) else data
|
|
84
|
+
if isinstance(pricing_info, dict) and not (pricing_info.get('pricingModel') or pricing_info.get('pricing_model')):
|
|
85
|
+
return None
|
|
86
|
+
return pricing_info
|
|
87
|
+
|
|
88
|
+
|
|
74
89
|
@docs_group('Configuration')
|
|
75
90
|
class Configuration(CrawleeConfiguration):
|
|
76
91
|
"""A class for specifying the configuration of an Actor.
|
|
@@ -446,7 +461,7 @@ class Configuration(CrawleeConfiguration):
|
|
|
446
461
|
description='JSON string with prising info of the actor',
|
|
447
462
|
discriminator='pricing_model',
|
|
448
463
|
),
|
|
449
|
-
BeforeValidator(
|
|
464
|
+
BeforeValidator(_parse_actor_pricing_info),
|
|
450
465
|
] = None
|
|
451
466
|
|
|
452
467
|
charged_event_counts: Annotated[
|