apify 3.4.1b2__tar.gz → 3.4.2b2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {apify-3.4.1b2 → apify-3.4.2b2}/CHANGELOG.md +14 -2
- {apify-3.4.1b2 → apify-3.4.2b2}/PKG-INFO +2 -3
- {apify-3.4.1b2 → apify-3.4.2b2}/pyproject.toml +3 -4
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/_actor.py +37 -50
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/_charging.py +2 -2
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/_configuration.py +2 -27
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/events/_apify_event_manager.py +2 -2
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/request_loaders/_apify_request_list.py +6 -3
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/_dataset_client.py +0 -11
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/_key_value_store_client.py +0 -11
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/_request_queue_shared_client.py +2 -2
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/_request_queue_single_client.py +2 -2
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_file_system/_dataset_client.py +2 -2
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_file_system/_key_value_store_client.py +8 -6
- {apify-3.4.1b2 → apify-3.4.2b2}/.gitignore +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/CONTRIBUTING.md +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/LICENSE +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/README.md +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/__init__.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/_consts.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/_crypto.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/_models.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/_proxy_configuration.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/_utils.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/events/__init__.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/events/_types.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/events/py.typed +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/log.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/py.typed +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/request_loaders/__init__.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/request_loaders/py.typed +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/__init__.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/_actor_runner.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/_async_thread.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/_logging_config.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/extensions/__init__.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/extensions/_httpcache.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/middlewares/__init__.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/middlewares/apify_proxy.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/middlewares/py.typed +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/pipelines/__init__.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/pipelines/actor_dataset_push.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/pipelines/py.typed +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/py.typed +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/requests.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/scheduler.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/scrapy/utils.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/__init__.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/__init__.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/_alias_resolving.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/_api_client_creation.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/_models.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/_request_queue_client.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/_storage_client.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/_utils.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/py.typed +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_file_system/__init__.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_file_system/_storage_client.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_ppe_dataset_mixin.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_smart_apify/__init__.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_smart_apify/_storage_client.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/py.typed +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storages/__init__.py +0 -0
- {apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storages/py.typed +0 -0
|
@@ -3,7 +3,20 @@
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
5
|
<!-- git-cliff-unreleased-start -->
|
|
6
|
-
## 3.4.
|
|
6
|
+
## 3.4.2 - **not yet released**
|
|
7
|
+
|
|
8
|
+
### 🚜 Refactor
|
|
9
|
+
|
|
10
|
+
- [**breaking**] Remove deprecated APIs ([#918](https://github.com/apify/apify-sdk-python/pull/918)) ([3e5728d](https://github.com/apify/apify-sdk-python/commit/3e5728d94cb8fd879d5a76e33a03d55792d835d5)) by [@vdusek](https://github.com/vdusek), closes [#635](https://github.com/apify/apify-sdk-python/issues/635)
|
|
11
|
+
|
|
12
|
+
### ⚙️ Miscellaneous Tasks
|
|
13
|
+
|
|
14
|
+
- [**breaking**] Drop Python 3.10 support ([#908](https://github.com/apify/apify-sdk-python/pull/908)) ([a67788e](https://github.com/apify/apify-sdk-python/commit/a67788e4d7845445a3e80a30af70580d2ab84f32)) by [@vdusek](https://github.com/vdusek), closes [#757](https://github.com/apify/apify-sdk-python/issues/757)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
<!-- git-cliff-unreleased-end -->
|
|
18
|
+
|
|
19
|
+
## [3.4.1](https://github.com/apify/apify-sdk-python/releases/tag/v3.4.1) (2026-05-29)
|
|
7
20
|
|
|
8
21
|
### 🐛 Bug Fixes
|
|
9
22
|
|
|
@@ -11,7 +24,6 @@ All notable changes to this project will be documented in this file.
|
|
|
11
24
|
- Update `push_data` annotation with `JsonSerializable` instead of Any ([#899](https://github.com/apify/apify-sdk-python/pull/899)) ([b4f5485](https://github.com/apify/apify-sdk-python/commit/b4f5485175cfd248893bc8a2b3017fa797d1000e)) by [@Mantisus](https://github.com/Mantisus)
|
|
12
25
|
|
|
13
26
|
|
|
14
|
-
<!-- git-cliff-unreleased-end -->
|
|
15
27
|
## [3.4.0](https://github.com/apify/apify-sdk-python/releases/tag/v3.4.0) (2026-05-05)
|
|
16
28
|
|
|
17
29
|
### 🚀 Features
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 3.4.
|
|
3
|
+
Version: 3.4.2b2
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
|
|
@@ -219,13 +219,12 @@ Classifier: Environment :: Console
|
|
|
219
219
|
Classifier: Intended Audience :: Developers
|
|
220
220
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
221
221
|
Classifier: Operating System :: OS Independent
|
|
222
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
223
222
|
Classifier: Programming Language :: Python :: 3.11
|
|
224
223
|
Classifier: Programming Language :: Python :: 3.12
|
|
225
224
|
Classifier: Programming Language :: Python :: 3.13
|
|
226
225
|
Classifier: Programming Language :: Python :: 3.14
|
|
227
226
|
Classifier: Topic :: Software Development :: Libraries
|
|
228
|
-
Requires-Python: >=3.
|
|
227
|
+
Requires-Python: >=3.11
|
|
229
228
|
Requires-Dist: apify-client<3.0.0,>=2.3.0
|
|
230
229
|
Requires-Dist: apify-shared<3.0.0,>=2.0.0
|
|
231
230
|
Requires-Dist: cachetools>=5.5.0
|
|
@@ -4,19 +4,18 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "apify"
|
|
7
|
-
version = "3.4.
|
|
7
|
+
version = "3.4.2b2"
|
|
8
8
|
description = "Apify SDK for Python"
|
|
9
9
|
authors = [{ name = "Apify Technologies s.r.o.", email = "support@apify.com" }]
|
|
10
10
|
license = { file = "LICENSE" }
|
|
11
11
|
readme = "README.md"
|
|
12
|
-
requires-python = ">=3.
|
|
12
|
+
requires-python = ">=3.11"
|
|
13
13
|
classifiers = [
|
|
14
14
|
"Development Status :: 5 - Production/Stable",
|
|
15
15
|
"Environment :: Console",
|
|
16
16
|
"Intended Audience :: Developers",
|
|
17
17
|
"License :: OSI Approved :: Apache Software License",
|
|
18
18
|
"Operating System :: OS Independent",
|
|
19
|
-
"Programming Language :: Python :: 3.10",
|
|
20
19
|
"Programming Language :: Python :: 3.11",
|
|
21
20
|
"Programming Language :: Python :: 3.12",
|
|
22
21
|
"Programming Language :: Python :: 3.13",
|
|
@@ -210,7 +209,7 @@ asyncio_mode = "auto"
|
|
|
210
209
|
timeout = 1800
|
|
211
210
|
|
|
212
211
|
[tool.ty.environment]
|
|
213
|
-
python-version = "3.
|
|
212
|
+
python-version = "3.11"
|
|
214
213
|
|
|
215
214
|
[tool.ty.src]
|
|
216
215
|
include = ["src", "tests", "scripts", "docs", "website"]
|
|
@@ -2,9 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import sys
|
|
5
|
-
import warnings
|
|
6
5
|
from contextlib import suppress
|
|
7
|
-
from datetime import datetime, timedelta
|
|
6
|
+
from datetime import UTC, datetime, timedelta
|
|
8
7
|
from functools import cached_property
|
|
9
8
|
from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast, overload
|
|
10
9
|
|
|
@@ -44,8 +43,7 @@ if TYPE_CHECKING:
|
|
|
44
43
|
from collections.abc import Callable, MutableMapping
|
|
45
44
|
from decimal import Decimal
|
|
46
45
|
from types import TracebackType
|
|
47
|
-
|
|
48
|
-
from typing_extensions import Self
|
|
46
|
+
from typing import Self
|
|
49
47
|
|
|
50
48
|
from apify_shared.consts import ActorPermissionLevel
|
|
51
49
|
from crawlee._types import JsonSerializable
|
|
@@ -868,7 +866,7 @@ class _ActorType:
|
|
|
868
866
|
max_total_charge_usd: Decimal | None = None,
|
|
869
867
|
restart_on_error: bool | None = None,
|
|
870
868
|
memory_mbytes: int | None = None,
|
|
871
|
-
timeout: timedelta | None | Literal['inherit'
|
|
869
|
+
timeout: timedelta | None | Literal['inherit'] = None,
|
|
872
870
|
force_permission_level: ActorPermissionLevel | None = None,
|
|
873
871
|
wait_for_finish: int | None = None,
|
|
874
872
|
webhooks: list[Webhook] | None = None,
|
|
@@ -890,8 +888,8 @@ class _ActorType:
|
|
|
890
888
|
memory_mbytes: Memory limit for the run, in megabytes. By default, the run uses a memory limit specified
|
|
891
889
|
in the default run configuration for the Actor.
|
|
892
890
|
timeout: Optional timeout for the run, in seconds. By default, the run uses timeout specified in
|
|
893
|
-
the default run configuration for the Actor. Using `inherit`
|
|
894
|
-
|
|
891
|
+
the default run configuration for the Actor. Using `inherit` will set timeout of the other Actor
|
|
892
|
+
to the time remaining from this Actor timeout.
|
|
895
893
|
force_permission_level: Override the Actor's permissions for this run. If not set, the Actor will run
|
|
896
894
|
with permissions configured in the Actor settings.
|
|
897
895
|
wait_for_finish: The maximum number of seconds the server waits for the run to finish. By default,
|
|
@@ -912,22 +910,14 @@ class _ActorType:
|
|
|
912
910
|
else:
|
|
913
911
|
serialized_webhooks = None
|
|
914
912
|
|
|
915
|
-
if timeout
|
|
916
|
-
if timeout == 'RemainingTime':
|
|
917
|
-
warnings.warn(
|
|
918
|
-
'`RemainingTime` is deprecated and will be removed in version 4.0.0. Use `inherit` instead.',
|
|
919
|
-
DeprecationWarning,
|
|
920
|
-
stacklevel=2,
|
|
921
|
-
)
|
|
913
|
+
if timeout == 'inherit':
|
|
922
914
|
actor_start_timeout = self._get_remaining_time()
|
|
923
915
|
elif timeout is None:
|
|
924
916
|
actor_start_timeout = None
|
|
925
917
|
elif isinstance(timeout, timedelta):
|
|
926
918
|
actor_start_timeout = timeout
|
|
927
919
|
else:
|
|
928
|
-
raise ValueError(
|
|
929
|
-
f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, `"RemainingTime"`, or a `timedelta`.'
|
|
930
|
-
)
|
|
920
|
+
raise ValueError(f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, or a `timedelta`.')
|
|
931
921
|
|
|
932
922
|
api_result = await client.actor(actor_id).start(
|
|
933
923
|
run_input=run_input,
|
|
@@ -989,7 +979,7 @@ class _ActorType:
|
|
|
989
979
|
max_total_charge_usd: Decimal | None = None,
|
|
990
980
|
restart_on_error: bool | None = None,
|
|
991
981
|
memory_mbytes: int | None = None,
|
|
992
|
-
timeout: timedelta | None | Literal['inherit'
|
|
982
|
+
timeout: timedelta | None | Literal['inherit'] = None,
|
|
993
983
|
force_permission_level: ActorPermissionLevel | None = None,
|
|
994
984
|
webhooks: list[Webhook] | None = None,
|
|
995
985
|
wait: timedelta | None = None,
|
|
@@ -1012,8 +1002,8 @@ class _ActorType:
|
|
|
1012
1002
|
memory_mbytes: Memory limit for the run, in megabytes. By default, the run uses a memory limit specified
|
|
1013
1003
|
in the default run configuration for the Actor.
|
|
1014
1004
|
timeout: Optional timeout for the run, in seconds. By default, the run uses timeout specified in
|
|
1015
|
-
the default run configuration for the Actor. Using `inherit`
|
|
1016
|
-
|
|
1005
|
+
the default run configuration for the Actor. Using `inherit` will set timeout of the other Actor
|
|
1006
|
+
to the time remaining from this Actor timeout.
|
|
1017
1007
|
force_permission_level: Override the Actor's permissions for this run. If not set, the Actor will run
|
|
1018
1008
|
with permissions configured in the Actor settings.
|
|
1019
1009
|
webhooks: Optional webhooks (https://docs.apify.com/webhooks) associated with the Actor run, which can
|
|
@@ -1037,23 +1027,14 @@ class _ActorType:
|
|
|
1037
1027
|
else:
|
|
1038
1028
|
serialized_webhooks = None
|
|
1039
1029
|
|
|
1040
|
-
if timeout
|
|
1041
|
-
if timeout == 'RemainingTime':
|
|
1042
|
-
warnings.warn(
|
|
1043
|
-
'`RemainingTime` is deprecated and will be removed in version 4.0.0. Use `inherit` instead.',
|
|
1044
|
-
DeprecationWarning,
|
|
1045
|
-
stacklevel=2,
|
|
1046
|
-
)
|
|
1047
|
-
|
|
1030
|
+
if timeout == 'inherit':
|
|
1048
1031
|
actor_call_timeout = self._get_remaining_time()
|
|
1049
1032
|
elif timeout is None:
|
|
1050
1033
|
actor_call_timeout = None
|
|
1051
1034
|
elif isinstance(timeout, timedelta):
|
|
1052
1035
|
actor_call_timeout = timeout
|
|
1053
1036
|
else:
|
|
1054
|
-
raise ValueError(
|
|
1055
|
-
f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, `"RemainingTime"`, or a `timedelta`.'
|
|
1056
|
-
)
|
|
1037
|
+
raise ValueError(f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, or a `timedelta`.')
|
|
1057
1038
|
|
|
1058
1039
|
api_result = await client.actor(actor_id).call(
|
|
1059
1040
|
run_input=run_input,
|
|
@@ -1232,22 +1213,21 @@ class _ActorType:
|
|
|
1232
1213
|
(self.event_manager._listeners_to_wrappers[Event.MIGRATING] or {}).values() # noqa: SLF001
|
|
1233
1214
|
)
|
|
1234
1215
|
|
|
1216
|
+
async def safe_dispatch(listener: Any, data: Any) -> None:
|
|
1217
|
+
try:
|
|
1218
|
+
await listener(data)
|
|
1219
|
+
except Exception:
|
|
1220
|
+
self.log.exception('A pre-reboot event listener failed')
|
|
1221
|
+
|
|
1222
|
+
timeout = event_listeners_timeout.total_seconds() if event_listeners_timeout else None
|
|
1235
1223
|
try:
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
timeout=event_listeners_timeout.total_seconds() if event_listeners_timeout else None,
|
|
1243
|
-
)
|
|
1244
|
-
except asyncio.TimeoutError:
|
|
1224
|
+
async with asyncio.timeout(timeout), asyncio.TaskGroup() as tg:
|
|
1225
|
+
for listener in persist_state_listeners:
|
|
1226
|
+
tg.create_task(safe_dispatch(listener, EventPersistStateData(is_migrating=True)))
|
|
1227
|
+
for listener in migrating_listeners:
|
|
1228
|
+
tg.create_task(safe_dispatch(listener, EventMigratingData()))
|
|
1229
|
+
except TimeoutError:
|
|
1245
1230
|
self.log.warning('Pre-reboot event listeners did not finish within timeout; proceeding with reboot')
|
|
1246
|
-
results = []
|
|
1247
|
-
|
|
1248
|
-
for result in results:
|
|
1249
|
-
if isinstance(result, Exception):
|
|
1250
|
-
self.log.exception('A pre-reboot event listener failed', exc_info=result)
|
|
1251
1231
|
|
|
1252
1232
|
if not self.configuration.actor_run_id:
|
|
1253
1233
|
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
|
|
@@ -1420,9 +1400,16 @@ class _ActorType:
|
|
|
1420
1400
|
return await kvs.get_auto_saved_value(key or self._ACTOR_STATE_KEY, default_value)
|
|
1421
1401
|
|
|
1422
1402
|
async def _save_actor_state(self) -> None:
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1403
|
+
async def safe_persist(kvs_name: str | None) -> None:
|
|
1404
|
+
try:
|
|
1405
|
+
store = await self.open_key_value_store(name=kvs_name)
|
|
1406
|
+
await store.persist_autosaved_values()
|
|
1407
|
+
except Exception:
|
|
1408
|
+
self.log.exception('Failed to persist auto-saved values', extra={'kvs_name': kvs_name})
|
|
1409
|
+
|
|
1410
|
+
async with asyncio.TaskGroup() as tg:
|
|
1411
|
+
for kvs_name in self._use_state_stores:
|
|
1412
|
+
tg.create_task(safe_persist(kvs_name))
|
|
1426
1413
|
|
|
1427
1414
|
def _get_default_exit_process(self) -> bool:
|
|
1428
1415
|
"""Return False for IPython and Scrapy environments, True otherwise."""
|
|
@@ -1442,10 +1429,10 @@ class _ActorType:
|
|
|
1442
1429
|
def _get_remaining_time(self) -> timedelta | None:
|
|
1443
1430
|
"""Get time remaining from the Actor timeout. Returns `None` if not on an Apify platform."""
|
|
1444
1431
|
if self.is_at_home() and self.configuration.timeout_at:
|
|
1445
|
-
return max(self.configuration.timeout_at - datetime.now(tz=
|
|
1432
|
+
return max(self.configuration.timeout_at - datetime.now(tz=UTC), timedelta(0))
|
|
1446
1433
|
|
|
1447
1434
|
self.log.warning(
|
|
1448
|
-
'Using `inherit`
|
|
1435
|
+
'Using `inherit` argument is only possible when the Actor'
|
|
1449
1436
|
' is running on the Apify platform and when the timeout for the Actor run is set. '
|
|
1450
1437
|
f'{self.is_at_home()=}, {self.configuration.timeout_at=}'
|
|
1451
1438
|
)
|
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import math
|
|
4
4
|
from contextvars import ContextVar
|
|
5
5
|
from dataclasses import dataclass
|
|
6
|
-
from datetime import
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
7
|
from decimal import Decimal
|
|
8
8
|
from typing import TYPE_CHECKING, Protocol, TypedDict
|
|
9
9
|
|
|
@@ -320,7 +320,7 @@ class ChargingManagerImplementation(ChargingManager):
|
|
|
320
320
|
'event_title': pricing_info.title,
|
|
321
321
|
'event_price_usd': float(round(pricing_info.price, 3)),
|
|
322
322
|
'charged_count': charged_count,
|
|
323
|
-
'timestamp': datetime.now(
|
|
323
|
+
'timestamp': datetime.now(UTC).isoformat(),
|
|
324
324
|
}
|
|
325
325
|
)
|
|
326
326
|
|
|
@@ -5,10 +5,10 @@ from datetime import datetime, timedelta
|
|
|
5
5
|
from decimal import Decimal
|
|
6
6
|
from logging import getLogger
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Annotated, Any
|
|
8
|
+
from typing import Annotated, Any, Self
|
|
9
9
|
|
|
10
10
|
from pydantic import AliasChoices, BeforeValidator, Field, model_validator
|
|
11
|
-
from typing_extensions import
|
|
11
|
+
from typing_extensions import TypedDict
|
|
12
12
|
|
|
13
13
|
from crawlee import service_locator
|
|
14
14
|
from crawlee._utils.models import timedelta_ms
|
|
@@ -273,22 +273,6 @@ class Configuration(CrawleeConfiguration):
|
|
|
273
273
|
),
|
|
274
274
|
] = False
|
|
275
275
|
|
|
276
|
-
latest_sdk_version: Annotated[
|
|
277
|
-
str | None,
|
|
278
|
-
Field(
|
|
279
|
-
alias='apify_sdk_latest_version',
|
|
280
|
-
description='Specifies the most recent release version of the Apify SDK for Javascript. Used for '
|
|
281
|
-
'checking for updates.',
|
|
282
|
-
),
|
|
283
|
-
deprecated('SDK version checking is not supported for the Python SDK'),
|
|
284
|
-
] = None
|
|
285
|
-
|
|
286
|
-
log_format: Annotated[
|
|
287
|
-
str | None,
|
|
288
|
-
Field(alias='apify_log_format'),
|
|
289
|
-
deprecated('Adjust the log format in code instead'),
|
|
290
|
-
] = None
|
|
291
|
-
|
|
292
276
|
max_paid_dataset_items: Annotated[
|
|
293
277
|
int | None,
|
|
294
278
|
Field(
|
|
@@ -386,15 +370,6 @@ class Configuration(CrawleeConfiguration):
|
|
|
386
370
|
BeforeValidator(lambda val: val if val != '' else None), # We should accept empty environment variables as well
|
|
387
371
|
] = None
|
|
388
372
|
|
|
389
|
-
standby_port: Annotated[
|
|
390
|
-
int,
|
|
391
|
-
Field(
|
|
392
|
-
alias='actor_standby_port',
|
|
393
|
-
description='TCP port for the Actor to start an HTTP server to receive messages in the Actor Standby mode',
|
|
394
|
-
),
|
|
395
|
-
deprecated('Use `web_server_port` instead'),
|
|
396
|
-
] = 4321
|
|
397
|
-
|
|
398
373
|
standby_url: Annotated[
|
|
399
374
|
str,
|
|
400
375
|
BeforeValidator(validate_http_url),
|
|
@@ -2,11 +2,11 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import contextlib
|
|
5
|
-
from typing import TYPE_CHECKING, Annotated
|
|
5
|
+
from typing import TYPE_CHECKING, Annotated, Self
|
|
6
6
|
|
|
7
7
|
import websockets.asyncio.client
|
|
8
8
|
from pydantic import Discriminator, TypeAdapter
|
|
9
|
-
from typing_extensions import
|
|
9
|
+
from typing_extensions import Unpack, override
|
|
10
10
|
|
|
11
11
|
from crawlee.events import EventManager
|
|
12
12
|
from crawlee.events._types import Event, EventPersistStateData
|
|
@@ -117,9 +117,12 @@ class ApifyRequestList(RequestList):
|
|
|
117
117
|
are extracted from the response body and turned into `Request` objects, inheriting `method`, `payload`,
|
|
118
118
|
`headers`, and `user_data` from the source entry.
|
|
119
119
|
"""
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
120
|
+
async with asyncio.TaskGroup() as tg:
|
|
121
|
+
tasks = [
|
|
122
|
+
tg.create_task(cls._process_remote_url(request_input, http_client))
|
|
123
|
+
for request_input in remote_url_requests_inputs
|
|
124
|
+
]
|
|
125
|
+
return list(chain.from_iterable(task.result() for task in tasks))
|
|
123
126
|
|
|
124
127
|
@staticmethod
|
|
125
128
|
def _create_requests_from_input(simple_url_inputs: list[_SimpleUrlInput]) -> list[Request]:
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
-
import warnings
|
|
5
4
|
from logging import getLogger
|
|
6
5
|
from typing import TYPE_CHECKING
|
|
7
6
|
|
|
@@ -42,7 +41,6 @@ class ApifyDatasetClient(DatasetClient, DatasetClientPpeMixin):
|
|
|
42
41
|
self,
|
|
43
42
|
*,
|
|
44
43
|
api_client: DatasetClientAsync,
|
|
45
|
-
api_public_base_url: str,
|
|
46
44
|
lock: asyncio.Lock,
|
|
47
45
|
) -> None:
|
|
48
46
|
"""Initialize a new instance.
|
|
@@ -58,14 +56,6 @@ class ApifyDatasetClient(DatasetClient, DatasetClientPpeMixin):
|
|
|
58
56
|
self._lock = lock
|
|
59
57
|
"""A lock to ensure that only one operation is performed at a time."""
|
|
60
58
|
|
|
61
|
-
if api_public_base_url:
|
|
62
|
-
# Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
|
|
63
|
-
warnings.warn(
|
|
64
|
-
'api_public_base_url argument is deprecated and will be removed in version 4.0.0',
|
|
65
|
-
DeprecationWarning,
|
|
66
|
-
stacklevel=2,
|
|
67
|
-
)
|
|
68
|
-
|
|
69
59
|
@override
|
|
70
60
|
async def get_metadata(self) -> DatasetMetadata:
|
|
71
61
|
metadata = await self._api_client.get()
|
|
@@ -114,7 +104,6 @@ class ApifyDatasetClient(DatasetClient, DatasetClientPpeMixin):
|
|
|
114
104
|
|
|
115
105
|
dataset_client = cls(
|
|
116
106
|
api_client=api_client,
|
|
117
|
-
api_public_base_url='', # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
|
|
118
107
|
lock=asyncio.Lock(),
|
|
119
108
|
)
|
|
120
109
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
-
import warnings
|
|
5
4
|
from logging import getLogger
|
|
6
5
|
from typing import TYPE_CHECKING, Any
|
|
7
6
|
|
|
@@ -30,7 +29,6 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
30
29
|
self,
|
|
31
30
|
*,
|
|
32
31
|
api_client: KeyValueStoreClientAsync,
|
|
33
|
-
api_public_base_url: str,
|
|
34
32
|
lock: asyncio.Lock,
|
|
35
33
|
) -> None:
|
|
36
34
|
"""Initialize a new instance.
|
|
@@ -43,14 +41,6 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
43
41
|
self._lock = lock
|
|
44
42
|
"""A lock to ensure that only one operation is performed at a time."""
|
|
45
43
|
|
|
46
|
-
if api_public_base_url:
|
|
47
|
-
# Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
|
|
48
|
-
warnings.warn(
|
|
49
|
-
'api_public_base_url argument is deprecated and will be removed in version 4.0.0',
|
|
50
|
-
DeprecationWarning,
|
|
51
|
-
stacklevel=2,
|
|
52
|
-
)
|
|
53
|
-
|
|
54
44
|
@override
|
|
55
45
|
async def get_metadata(self) -> ApifyKeyValueStoreMetadata:
|
|
56
46
|
metadata = await self._api_client.get()
|
|
@@ -98,7 +88,6 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
98
88
|
)
|
|
99
89
|
return cls(
|
|
100
90
|
api_client=api_client,
|
|
101
|
-
api_public_base_url='', # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
|
|
102
91
|
lock=asyncio.Lock(),
|
|
103
92
|
)
|
|
104
93
|
|
{apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/_request_queue_shared_client.py
RENAMED
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
from collections import deque
|
|
5
|
-
from datetime import datetime, timedelta
|
|
5
|
+
from datetime import UTC, datetime, timedelta
|
|
6
6
|
from logging import getLogger
|
|
7
7
|
from typing import TYPE_CHECKING, Any, Final
|
|
8
8
|
|
|
@@ -214,7 +214,7 @@ class ApifyRequestQueueSharedClient:
|
|
|
214
214
|
request_id = unique_key_to_request_id(request.unique_key)
|
|
215
215
|
# Set the handled_at timestamp if not already set
|
|
216
216
|
if request.handled_at is None:
|
|
217
|
-
request.handled_at = datetime.now(tz=
|
|
217
|
+
request.handled_at = datetime.now(tz=UTC)
|
|
218
218
|
|
|
219
219
|
if cached_request := self._requests_cache.get(request_id):
|
|
220
220
|
cached_request.was_already_handled = request.was_already_handled
|
{apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_apify/_request_queue_single_client.py
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from collections import deque
|
|
4
|
-
from datetime import
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
5
|
from logging import getLogger
|
|
6
6
|
from typing import TYPE_CHECKING, Final
|
|
7
7
|
|
|
@@ -212,7 +212,7 @@ class ApifyRequestQueueSingleClient:
|
|
|
212
212
|
cached_request.handled_at = request.handled_at
|
|
213
213
|
|
|
214
214
|
if request.handled_at is None:
|
|
215
|
-
request.handled_at = datetime.now(tz=
|
|
215
|
+
request.handled_at = datetime.now(tz=UTC)
|
|
216
216
|
self.metadata.handled_request_count += 1
|
|
217
217
|
self.metadata.pending_request_count -= 1
|
|
218
218
|
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING, Any
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Self
|
|
4
4
|
|
|
5
|
-
from typing_extensions import
|
|
5
|
+
from typing_extensions import override
|
|
6
6
|
|
|
7
7
|
from crawlee.storage_clients._file_system import FileSystemDatasetClient
|
|
8
8
|
|
{apify-3.4.1b2 → apify-3.4.2b2}/src/apify/storage_clients/_file_system/_key_value_store_client.py
RENAMED
|
@@ -3,8 +3,9 @@ import json
|
|
|
3
3
|
import logging
|
|
4
4
|
from itertools import chain
|
|
5
5
|
from pathlib import Path
|
|
6
|
+
from typing import Self
|
|
6
7
|
|
|
7
|
-
from typing_extensions import
|
|
8
|
+
from typing_extensions import override
|
|
8
9
|
|
|
9
10
|
from crawlee._consts import METADATA_FILENAME
|
|
10
11
|
from crawlee._utils.file import atomic_write, infer_mime_type, json_dumps
|
|
@@ -74,11 +75,12 @@ class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
|
|
|
74
75
|
files_to_keep = {self._input_key_filename, f'{self._input_key_filename}.{METADATA_FILENAME}'}
|
|
75
76
|
files_to_keep.add(METADATA_FILENAME)
|
|
76
77
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
78
|
+
async with asyncio.TaskGroup() as tg:
|
|
79
|
+
for file_path in self.path_to_kvs.glob('*'):
|
|
80
|
+
if file_path.name in files_to_keep:
|
|
81
|
+
continue
|
|
82
|
+
if file_path.is_file():
|
|
83
|
+
tg.create_task(asyncio.to_thread(file_path.unlink, missing_ok=True))
|
|
82
84
|
|
|
83
85
|
await self._update_metadata(
|
|
84
86
|
update_accessed_at=True,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|