apify 2.1.0b9__py3-none-any.whl → 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/_actor.py +74 -26
- apify/_configuration.py +45 -1
- apify/_proxy_configuration.py +3 -3
- apify/apify_storage_client/_apify_storage_client.py +12 -2
- apify/apify_storage_client/_dataset_client.py +2 -1
- apify/apify_storage_client/_dataset_collection_client.py +2 -1
- apify/apify_storage_client/_key_value_store_client.py +2 -6
- apify/apify_storage_client/_key_value_store_collection_client.py +2 -1
- apify/apify_storage_client/_request_queue_client.py +2 -22
- apify/apify_storage_client/_request_queue_collection_client.py +2 -1
- apify/log.py +4 -8
- apify/storages/_request_list.py +1 -1
- {apify-2.1.0b9.dist-info → apify-2.2.0.dist-info}/METADATA +6 -5
- {apify-2.1.0b9.dist-info → apify-2.2.0.dist-info}/RECORD +16 -16
- {apify-2.1.0b9.dist-info → apify-2.2.0.dist-info}/WHEEL +1 -1
- {apify-2.1.0b9.dist-info → apify-2.2.0.dist-info}/LICENSE +0 -0
apify/_actor.py
CHANGED
|
@@ -7,13 +7,14 @@ from datetime import timedelta
|
|
|
7
7
|
from typing import TYPE_CHECKING, Any, Callable, TypeVar, cast
|
|
8
8
|
|
|
9
9
|
from lazy_object_proxy import Proxy
|
|
10
|
+
from more_itertools import flatten
|
|
10
11
|
from pydantic import AliasChoices
|
|
11
12
|
|
|
12
13
|
from apify_client import ApifyClientAsync
|
|
13
14
|
from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
|
|
14
15
|
from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
|
|
15
|
-
from crawlee import
|
|
16
|
-
from crawlee.events._types import Event, EventPersistStateData
|
|
16
|
+
from crawlee import service_locator
|
|
17
|
+
from crawlee.events._types import Event, EventMigratingData, EventPersistStateData
|
|
17
18
|
|
|
18
19
|
from apify._configuration import Configuration
|
|
19
20
|
from apify._consts import EVENT_LISTENERS_TIMEOUT
|
|
@@ -33,6 +34,7 @@ if TYPE_CHECKING:
|
|
|
33
34
|
from typing_extensions import Self
|
|
34
35
|
|
|
35
36
|
from crawlee.proxy_configuration import _NewUrlFunction
|
|
37
|
+
from crawlee.storage_clients import BaseStorageClient
|
|
36
38
|
|
|
37
39
|
from apify._models import Webhook
|
|
38
40
|
|
|
@@ -48,6 +50,7 @@ class _ActorType:
|
|
|
48
50
|
_apify_client: ApifyClientAsync
|
|
49
51
|
_configuration: Configuration
|
|
50
52
|
_is_exiting = False
|
|
53
|
+
_is_rebooting = False
|
|
51
54
|
|
|
52
55
|
def __init__(
|
|
53
56
|
self,
|
|
@@ -69,17 +72,22 @@ class _ActorType:
|
|
|
69
72
|
self._configure_logging = configure_logging
|
|
70
73
|
self._apify_client = self.new_client()
|
|
71
74
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
+
# Create an instance of the cloud storage client, the local storage client is obtained
|
|
76
|
+
# from the service locator.
|
|
77
|
+
self._cloud_storage_client = ApifyStorageClient.from_config(config=self._configuration)
|
|
78
|
+
|
|
79
|
+
# Set the event manager based on whether the Actor is running on the platform or locally.
|
|
80
|
+
self._event_manager = (
|
|
81
|
+
PlatformEventManager(
|
|
75
82
|
config=self._configuration,
|
|
76
83
|
persist_state_interval=self._configuration.persist_state_interval,
|
|
77
84
|
)
|
|
78
|
-
|
|
79
|
-
|
|
85
|
+
if self.is_at_home()
|
|
86
|
+
else LocalEventManager(
|
|
80
87
|
system_info_interval=self._configuration.system_info_interval,
|
|
81
88
|
persist_state_interval=self._configuration.persist_state_interval,
|
|
82
89
|
)
|
|
90
|
+
)
|
|
83
91
|
|
|
84
92
|
self._is_initialized = False
|
|
85
93
|
|
|
@@ -92,9 +100,6 @@ class _ActorType:
|
|
|
92
100
|
When you exit the `async with` block, the `Actor.exit()` method is called, and if any exception happens while
|
|
93
101
|
executing the block code, the `Actor.fail` method is called.
|
|
94
102
|
"""
|
|
95
|
-
if self._configure_logging:
|
|
96
|
-
_configure_logging(self._configuration)
|
|
97
|
-
|
|
98
103
|
await self.init()
|
|
99
104
|
return self
|
|
100
105
|
|
|
@@ -154,10 +159,25 @@ class _ActorType:
|
|
|
154
159
|
"""The logging.Logger instance the Actor uses."""
|
|
155
160
|
return logger
|
|
156
161
|
|
|
162
|
+
@property
|
|
163
|
+
def _local_storage_client(self) -> BaseStorageClient:
|
|
164
|
+
"""The local storage client the Actor instance uses."""
|
|
165
|
+
return service_locator.get_storage_client()
|
|
166
|
+
|
|
157
167
|
def _raise_if_not_initialized(self) -> None:
|
|
158
168
|
if not self._is_initialized:
|
|
159
169
|
raise RuntimeError('The Actor was not initialized!')
|
|
160
170
|
|
|
171
|
+
def _raise_if_cloud_requested_but_not_configured(self, *, force_cloud: bool) -> None:
|
|
172
|
+
if not force_cloud:
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
if not self.is_at_home() and self.config.token is None:
|
|
176
|
+
raise RuntimeError(
|
|
177
|
+
'In order to use the Apify cloud storage from your computer, '
|
|
178
|
+
'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
|
|
179
|
+
)
|
|
180
|
+
|
|
161
181
|
async def init(self) -> None:
|
|
162
182
|
"""Initialize the Actor instance.
|
|
163
183
|
|
|
@@ -172,18 +192,19 @@ class _ActorType:
|
|
|
172
192
|
if self._is_initialized:
|
|
173
193
|
raise RuntimeError('The Actor was already initialized!')
|
|
174
194
|
|
|
175
|
-
|
|
176
|
-
|
|
195
|
+
self._is_exiting = False
|
|
196
|
+
self._was_final_persist_state_emitted = False
|
|
177
197
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
service_container.set_default_storage_client_type('local')
|
|
198
|
+
# If the Actor is running on the Apify platform, we set the cloud storage client.
|
|
199
|
+
if self.is_at_home():
|
|
200
|
+
service_locator.set_storage_client(self._cloud_storage_client)
|
|
182
201
|
|
|
183
|
-
|
|
202
|
+
service_locator.set_event_manager(self.event_manager)
|
|
203
|
+
service_locator.set_configuration(self.configuration)
|
|
184
204
|
|
|
185
|
-
|
|
186
|
-
self.
|
|
205
|
+
# The logging configuration has to be called after all service_locator set methods.
|
|
206
|
+
if self._configure_logging:
|
|
207
|
+
_configure_logging()
|
|
187
208
|
|
|
188
209
|
self.log.info('Initializing Actor...')
|
|
189
210
|
self.log.info('System info', extra=get_system_info())
|
|
@@ -233,7 +254,6 @@ class _ActorType:
|
|
|
233
254
|
await self._event_manager.wait_for_all_listeners_to_complete(timeout=event_listeners_timeout)
|
|
234
255
|
|
|
235
256
|
await self._event_manager.__aexit__(None, None, None)
|
|
236
|
-
cast(dict, service_container._services).clear() # noqa: SLF001
|
|
237
257
|
|
|
238
258
|
await asyncio.wait_for(finalize(), cleanup_timeout.total_seconds())
|
|
239
259
|
self._is_initialized = False
|
|
@@ -335,12 +355,15 @@ class _ActorType:
|
|
|
335
355
|
An instance of the `Dataset` class for the given ID or name.
|
|
336
356
|
"""
|
|
337
357
|
self._raise_if_not_initialized()
|
|
358
|
+
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
|
|
359
|
+
|
|
360
|
+
storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
|
|
338
361
|
|
|
339
362
|
return await Dataset.open(
|
|
340
363
|
id=id,
|
|
341
364
|
name=name,
|
|
342
365
|
configuration=self._configuration,
|
|
343
|
-
storage_client=
|
|
366
|
+
storage_client=storage_client,
|
|
344
367
|
)
|
|
345
368
|
|
|
346
369
|
async def open_key_value_store(
|
|
@@ -367,12 +390,14 @@ class _ActorType:
|
|
|
367
390
|
An instance of the `KeyValueStore` class for the given ID or name.
|
|
368
391
|
"""
|
|
369
392
|
self._raise_if_not_initialized()
|
|
393
|
+
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
|
|
394
|
+
storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
|
|
370
395
|
|
|
371
396
|
return await KeyValueStore.open(
|
|
372
397
|
id=id,
|
|
373
398
|
name=name,
|
|
374
399
|
configuration=self._configuration,
|
|
375
|
-
storage_client=
|
|
400
|
+
storage_client=storage_client,
|
|
376
401
|
)
|
|
377
402
|
|
|
378
403
|
async def open_request_queue(
|
|
@@ -401,12 +426,15 @@ class _ActorType:
|
|
|
401
426
|
An instance of the `RequestQueue` class for the given ID or name.
|
|
402
427
|
"""
|
|
403
428
|
self._raise_if_not_initialized()
|
|
429
|
+
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
|
|
430
|
+
|
|
431
|
+
storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
|
|
404
432
|
|
|
405
433
|
return await RequestQueue.open(
|
|
406
434
|
id=id,
|
|
407
435
|
name=name,
|
|
408
436
|
configuration=self._configuration,
|
|
409
|
-
storage_client=
|
|
437
|
+
storage_client=storage_client,
|
|
410
438
|
)
|
|
411
439
|
|
|
412
440
|
async def push_data(self, data: dict | list[dict]) -> None:
|
|
@@ -826,12 +854,32 @@ class _ActorType:
|
|
|
826
854
|
self.log.error('Actor.reboot() is only supported when running on the Apify platform.')
|
|
827
855
|
return
|
|
828
856
|
|
|
857
|
+
if self._is_rebooting:
|
|
858
|
+
self.log.debug('Actor is already rebooting, skipping the additional reboot call.')
|
|
859
|
+
return
|
|
860
|
+
|
|
861
|
+
self._is_rebooting = True
|
|
862
|
+
|
|
829
863
|
if not custom_after_sleep:
|
|
830
864
|
custom_after_sleep = self._configuration.metamorph_after_sleep
|
|
831
865
|
|
|
832
|
-
|
|
866
|
+
# Call all the listeners for the PERSIST_STATE and MIGRATING events, and wait for them to finish.
|
|
867
|
+
# PERSIST_STATE listeners are called to allow the Actor to persist its state before the reboot.
|
|
868
|
+
# MIGRATING listeners are called to allow the Actor to gracefully stop in-progress tasks before the reboot.
|
|
869
|
+
# Typically, crawlers are listening for the MIIGRATING event to stop processing new requests.
|
|
870
|
+
# We can't just emit the events and wait for all listeners to finish,
|
|
871
|
+
# because this method might be called from an event listener itself, and we would deadlock.
|
|
872
|
+
persist_state_listeners = flatten(
|
|
873
|
+
(self._event_manager._listeners_to_wrappers[Event.PERSIST_STATE] or {}).values() # noqa: SLF001
|
|
874
|
+
)
|
|
875
|
+
migrating_listeners = flatten(
|
|
876
|
+
(self._event_manager._listeners_to_wrappers[Event.MIGRATING] or {}).values() # noqa: SLF001
|
|
877
|
+
)
|
|
833
878
|
|
|
834
|
-
await
|
|
879
|
+
await asyncio.gather(
|
|
880
|
+
*[listener(EventPersistStateData(is_migrating=True)) for listener in persist_state_listeners],
|
|
881
|
+
*[listener(EventMigratingData()) for listener in migrating_listeners],
|
|
882
|
+
)
|
|
835
883
|
|
|
836
884
|
if not self._configuration.actor_run_id:
|
|
837
885
|
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
|
|
@@ -928,7 +976,7 @@ class _ActorType:
|
|
|
928
976
|
password: str | None = None,
|
|
929
977
|
groups: list[str] | None = None,
|
|
930
978
|
country_code: str | None = None,
|
|
931
|
-
proxy_urls: list[str] | None = None,
|
|
979
|
+
proxy_urls: list[str | None] | None = None,
|
|
932
980
|
new_url_function: _NewUrlFunction | None = None,
|
|
933
981
|
) -> ProxyConfiguration | None:
|
|
934
982
|
"""Create a ProxyConfiguration object with the passed proxy configuration.
|
apify/_configuration.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from datetime import datetime, timedelta
|
|
4
|
-
from
|
|
4
|
+
from logging import getLogger
|
|
5
|
+
from typing import Annotated, Any
|
|
5
6
|
|
|
6
7
|
from pydantic import AliasChoices, BeforeValidator, Field
|
|
7
8
|
from typing_extensions import deprecated
|
|
@@ -12,6 +13,16 @@ from crawlee.configuration import Configuration as CrawleeConfiguration
|
|
|
12
13
|
|
|
13
14
|
from apify._utils import docs_group
|
|
14
15
|
|
|
16
|
+
logger = getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _transform_to_list(value: Any) -> list[str] | None:
|
|
20
|
+
if value is None:
|
|
21
|
+
return None
|
|
22
|
+
if not value:
|
|
23
|
+
return []
|
|
24
|
+
return value if isinstance(value, list) else str(value).split(',')
|
|
25
|
+
|
|
15
26
|
|
|
16
27
|
@docs_group('Classes')
|
|
17
28
|
class Configuration(CrawleeConfiguration):
|
|
@@ -33,6 +44,13 @@ class Configuration(CrawleeConfiguration):
|
|
|
33
44
|
),
|
|
34
45
|
] = None
|
|
35
46
|
|
|
47
|
+
actor_full_name: Annotated[
|
|
48
|
+
str | None,
|
|
49
|
+
Field(
|
|
50
|
+
description='Full name of the Actor',
|
|
51
|
+
),
|
|
52
|
+
] = None
|
|
53
|
+
|
|
36
54
|
actor_run_id: Annotated[
|
|
37
55
|
str | None,
|
|
38
56
|
Field(
|
|
@@ -67,6 +85,14 @@ class Configuration(CrawleeConfiguration):
|
|
|
67
85
|
),
|
|
68
86
|
] = None
|
|
69
87
|
|
|
88
|
+
actor_build_tags: Annotated[
|
|
89
|
+
list[str] | None,
|
|
90
|
+
Field(
|
|
91
|
+
description='Build tags of the Actor build used in the run',
|
|
92
|
+
),
|
|
93
|
+
BeforeValidator(_transform_to_list),
|
|
94
|
+
] = None
|
|
95
|
+
|
|
70
96
|
actor_task_id: Annotated[
|
|
71
97
|
str | None,
|
|
72
98
|
Field(
|
|
@@ -185,6 +211,15 @@ class Configuration(CrawleeConfiguration):
|
|
|
185
211
|
BeforeValidator(lambda val: val or None),
|
|
186
212
|
] = None
|
|
187
213
|
|
|
214
|
+
max_total_charge_usd: Annotated[
|
|
215
|
+
float | None,
|
|
216
|
+
Field(
|
|
217
|
+
alias='actor_max_total_charge_usd',
|
|
218
|
+
description='For pay-per-event Actors, the user-set limit on total charges. Do not exceed this limit',
|
|
219
|
+
),
|
|
220
|
+
BeforeValidator(lambda val: val or None),
|
|
221
|
+
] = None
|
|
222
|
+
|
|
188
223
|
meta_origin: Annotated[
|
|
189
224
|
str | None,
|
|
190
225
|
Field(
|
|
@@ -321,6 +356,15 @@ class Configuration(CrawleeConfiguration):
|
|
|
321
356
|
),
|
|
322
357
|
] = None
|
|
323
358
|
|
|
359
|
+
@classmethod
|
|
360
|
+
def get_global_configuration(cls) -> Configuration:
|
|
361
|
+
"""Retrieve the global instance of the configuration.
|
|
362
|
+
|
|
363
|
+
Mostly for the backwards compatibility. It is recommended to use the `service_locator.get_configuration()`
|
|
364
|
+
instead.
|
|
365
|
+
"""
|
|
366
|
+
return cls()
|
|
367
|
+
|
|
324
368
|
|
|
325
369
|
# Monkey-patch the base class so that it works with the extended configuration
|
|
326
370
|
CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore[method-assign]
|
apify/_proxy_configuration.py
CHANGED
|
@@ -111,9 +111,9 @@ class ProxyConfiguration(CrawleeProxyConfiguration):
|
|
|
111
111
|
password: str | None = None,
|
|
112
112
|
groups: list[str] | None = None,
|
|
113
113
|
country_code: str | None = None,
|
|
114
|
-
proxy_urls: list[str] | None = None,
|
|
114
|
+
proxy_urls: list[str | None] | None = None,
|
|
115
115
|
new_url_function: _NewUrlFunction | None = None,
|
|
116
|
-
tiered_proxy_urls: list[list[str]] | None = None,
|
|
116
|
+
tiered_proxy_urls: list[list[str | None]] | None = None,
|
|
117
117
|
_actor_config: Configuration | None = None,
|
|
118
118
|
_apify_client: ApifyClientAsync | None = None,
|
|
119
119
|
) -> None:
|
|
@@ -148,7 +148,7 @@ class ProxyConfiguration(CrawleeProxyConfiguration):
|
|
|
148
148
|
' "groups" or "country_code".'
|
|
149
149
|
)
|
|
150
150
|
|
|
151
|
-
if proxy_urls and any('apify.com' in url for url in proxy_urls):
|
|
151
|
+
if proxy_urls and any('apify.com' in (url or '') for url in proxy_urls):
|
|
152
152
|
logger.warning(
|
|
153
153
|
'Some Apify proxy features may work incorrectly. Please consider setting up Apify properties '
|
|
154
154
|
'instead of `proxy_urls`.\n'
|
|
@@ -1,10 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
1
5
|
from typing_extensions import override
|
|
2
6
|
|
|
3
7
|
from apify_client import ApifyClientAsync
|
|
4
8
|
from crawlee._utils.crypto import crypto_random_object_id
|
|
5
|
-
from crawlee.
|
|
9
|
+
from crawlee.storage_clients import BaseStorageClient
|
|
6
10
|
|
|
7
|
-
from apify._configuration import Configuration
|
|
8
11
|
from apify._utils import docs_group
|
|
9
12
|
from apify.apify_storage_client._dataset_client import DatasetClient
|
|
10
13
|
from apify.apify_storage_client._dataset_collection_client import DatasetCollectionClient
|
|
@@ -13,6 +16,9 @@ from apify.apify_storage_client._key_value_store_collection_client import KeyVal
|
|
|
13
16
|
from apify.apify_storage_client._request_queue_client import RequestQueueClient
|
|
14
17
|
from apify.apify_storage_client._request_queue_collection_client import RequestQueueCollectionClient
|
|
15
18
|
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from apify._configuration import Configuration
|
|
21
|
+
|
|
16
22
|
|
|
17
23
|
@docs_group('Classes')
|
|
18
24
|
class ApifyStorageClient(BaseStorageClient):
|
|
@@ -29,6 +35,10 @@ class ApifyStorageClient(BaseStorageClient):
|
|
|
29
35
|
)
|
|
30
36
|
self._configuration = configuration
|
|
31
37
|
|
|
38
|
+
@classmethod
|
|
39
|
+
def from_config(cls, config: Configuration) -> ApifyStorageClient:
|
|
40
|
+
return cls(configuration=config)
|
|
41
|
+
|
|
32
42
|
@override
|
|
33
43
|
def dataset(self, id: str) -> DatasetClient:
|
|
34
44
|
return DatasetClient(self._apify_client.dataset(id))
|
|
@@ -4,7 +4,8 @@ from typing import TYPE_CHECKING
|
|
|
4
4
|
|
|
5
5
|
from typing_extensions import override
|
|
6
6
|
|
|
7
|
-
from crawlee.
|
|
7
|
+
from crawlee.storage_clients._base import BaseDatasetClient
|
|
8
|
+
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
|
|
8
9
|
|
|
9
10
|
if TYPE_CHECKING:
|
|
10
11
|
from collections.abc import AsyncIterator
|
|
@@ -4,7 +4,8 @@ from typing import TYPE_CHECKING
|
|
|
4
4
|
|
|
5
5
|
from typing_extensions import override
|
|
6
6
|
|
|
7
|
-
from crawlee.
|
|
7
|
+
from crawlee.storage_clients._base import BaseDatasetCollectionClient
|
|
8
|
+
from crawlee.storage_clients.models import DatasetListPage, DatasetMetadata
|
|
8
9
|
|
|
9
10
|
if TYPE_CHECKING:
|
|
10
11
|
from apify_client.clients import DatasetCollectionClientAsync
|
|
@@ -5,12 +5,8 @@ from typing import TYPE_CHECKING, Any
|
|
|
5
5
|
|
|
6
6
|
from typing_extensions import override
|
|
7
7
|
|
|
8
|
-
from crawlee.
|
|
9
|
-
|
|
10
|
-
KeyValueStoreListKeysPage,
|
|
11
|
-
KeyValueStoreMetadata,
|
|
12
|
-
KeyValueStoreRecord,
|
|
13
|
-
)
|
|
8
|
+
from crawlee.storage_clients._base import BaseKeyValueStoreClient
|
|
9
|
+
from crawlee.storage_clients.models import KeyValueStoreListKeysPage, KeyValueStoreMetadata, KeyValueStoreRecord
|
|
14
10
|
|
|
15
11
|
if TYPE_CHECKING:
|
|
16
12
|
from collections.abc import AsyncIterator
|
|
@@ -4,7 +4,8 @@ from typing import TYPE_CHECKING
|
|
|
4
4
|
|
|
5
5
|
from typing_extensions import override
|
|
6
6
|
|
|
7
|
-
from crawlee.
|
|
7
|
+
from crawlee.storage_clients._base import BaseKeyValueStoreCollectionClient
|
|
8
|
+
from crawlee.storage_clients.models import KeyValueStoreListPage, KeyValueStoreMetadata
|
|
8
9
|
|
|
9
10
|
if TYPE_CHECKING:
|
|
10
11
|
from apify_client.clients import KeyValueStoreCollectionClientAsync
|
|
@@ -5,8 +5,8 @@ from typing import TYPE_CHECKING
|
|
|
5
5
|
from typing_extensions import override
|
|
6
6
|
|
|
7
7
|
from crawlee import Request
|
|
8
|
-
from crawlee.
|
|
9
|
-
|
|
8
|
+
from crawlee.storage_clients._base import BaseRequestQueueClient
|
|
9
|
+
from crawlee.storage_clients.models import (
|
|
10
10
|
BatchRequestsOperationResponse,
|
|
11
11
|
ProcessedRequest,
|
|
12
12
|
ProlongRequestLockResponse,
|
|
@@ -80,10 +80,6 @@ class RequestQueueClient(BaseRequestQueueClient):
|
|
|
80
80
|
by_alias=True,
|
|
81
81
|
exclude={
|
|
82
82
|
'id',
|
|
83
|
-
'json_',
|
|
84
|
-
'order_no',
|
|
85
|
-
'query_params',
|
|
86
|
-
'data',
|
|
87
83
|
},
|
|
88
84
|
),
|
|
89
85
|
forefront=forefront,
|
|
@@ -107,12 +103,6 @@ class RequestQueueClient(BaseRequestQueueClient):
|
|
|
107
103
|
| await self._client.update_request(
|
|
108
104
|
request=request.model_dump(
|
|
109
105
|
by_alias=True,
|
|
110
|
-
exclude={
|
|
111
|
-
'json_',
|
|
112
|
-
'order_no',
|
|
113
|
-
'query_params',
|
|
114
|
-
'data',
|
|
115
|
-
},
|
|
116
106
|
),
|
|
117
107
|
forefront=forefront,
|
|
118
108
|
)
|
|
@@ -164,10 +154,6 @@ class RequestQueueClient(BaseRequestQueueClient):
|
|
|
164
154
|
by_alias=True,
|
|
165
155
|
exclude={
|
|
166
156
|
'id',
|
|
167
|
-
'json_',
|
|
168
|
-
'order_no',
|
|
169
|
-
'query_params',
|
|
170
|
-
'data',
|
|
171
157
|
},
|
|
172
158
|
)
|
|
173
159
|
for r in requests
|
|
@@ -183,12 +169,6 @@ class RequestQueueClient(BaseRequestQueueClient):
|
|
|
183
169
|
requests=[
|
|
184
170
|
r.model_dump(
|
|
185
171
|
by_alias=True,
|
|
186
|
-
exclude={
|
|
187
|
-
'json_',
|
|
188
|
-
'order_no',
|
|
189
|
-
'query_params',
|
|
190
|
-
'data',
|
|
191
|
-
},
|
|
192
172
|
)
|
|
193
173
|
for r in requests
|
|
194
174
|
],
|
|
@@ -4,7 +4,8 @@ from typing import TYPE_CHECKING
|
|
|
4
4
|
|
|
5
5
|
from typing_extensions import override
|
|
6
6
|
|
|
7
|
-
from crawlee.
|
|
7
|
+
from crawlee.storage_clients._base import BaseRequestQueueCollectionClient
|
|
8
|
+
from crawlee.storage_clients.models import RequestQueueListPage, RequestQueueMetadata
|
|
8
9
|
|
|
9
10
|
if TYPE_CHECKING:
|
|
10
11
|
from apify_client.clients import RequestQueueCollectionClientAsync
|
apify/log.py
CHANGED
|
@@ -1,14 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import TYPE_CHECKING
|
|
5
4
|
|
|
6
5
|
from apify_shared.utils import ignore_docs
|
|
7
6
|
from crawlee._log_config import CrawleeLogFormatter, configure_logger, get_configured_log_level
|
|
8
7
|
|
|
9
|
-
if TYPE_CHECKING:
|
|
10
|
-
from apify import Configuration
|
|
11
|
-
|
|
12
8
|
# Name of the logger used throughout the library (resolves to 'apify')
|
|
13
9
|
logger_name = __name__.split('.')[0]
|
|
14
10
|
|
|
@@ -21,11 +17,11 @@ class ActorLogFormatter(CrawleeLogFormatter): # noqa: D101 (Inherited from pare
|
|
|
21
17
|
pass
|
|
22
18
|
|
|
23
19
|
|
|
24
|
-
def _configure_logging(
|
|
20
|
+
def _configure_logging() -> None:
|
|
25
21
|
apify_client_logger = logging.getLogger('apify_client')
|
|
26
|
-
configure_logger(apify_client_logger,
|
|
22
|
+
configure_logger(apify_client_logger, remove_old_handlers=True)
|
|
27
23
|
|
|
28
|
-
level = get_configured_log_level(
|
|
24
|
+
level = get_configured_log_level()
|
|
29
25
|
|
|
30
26
|
# Keep apify_client logger quiet unless debug logging is requested
|
|
31
27
|
if level > logging.DEBUG:
|
|
@@ -42,4 +38,4 @@ def _configure_logging(configuration: Configuration) -> None:
|
|
|
42
38
|
|
|
43
39
|
# Use configured log level for apify logger
|
|
44
40
|
apify_logger = logging.getLogger('apify')
|
|
45
|
-
configure_logger(apify_logger,
|
|
41
|
+
configure_logger(apify_logger, remove_old_handlers=True)
|
apify/storages/_request_list.py
CHANGED
|
@@ -11,7 +11,7 @@ from pydantic import BaseModel, Field, TypeAdapter
|
|
|
11
11
|
from crawlee import Request
|
|
12
12
|
from crawlee._types import HttpMethod
|
|
13
13
|
from crawlee.http_clients import BaseHttpClient, HttpxHttpClient
|
|
14
|
-
from crawlee.
|
|
14
|
+
from crawlee.request_loaders import RequestList as CrawleeRequestList
|
|
15
15
|
|
|
16
16
|
from apify._utils import docs_group
|
|
17
17
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.2.0
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: apify,sdk,automation,chrome,crawlee,crawler,headless,scraper,scraping
|
|
@@ -20,11 +20,12 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
20
20
|
Classifier: Topic :: Software Development :: Libraries
|
|
21
21
|
Provides-Extra: scrapy
|
|
22
22
|
Requires-Dist: apify-client (>=1.8.1)
|
|
23
|
-
Requires-Dist: apify-shared (>=1.1
|
|
24
|
-
Requires-Dist: crawlee (>=0.
|
|
23
|
+
Requires-Dist: apify-shared (>=1.2.1)
|
|
24
|
+
Requires-Dist: crawlee (>=0.5.1,<0.6.0)
|
|
25
25
|
Requires-Dist: cryptography (>=42.0.0)
|
|
26
|
-
Requires-Dist: httpx (>=0.27.0
|
|
26
|
+
Requires-Dist: httpx (>=0.27.0)
|
|
27
27
|
Requires-Dist: lazy-object-proxy (>=1.10.0)
|
|
28
|
+
Requires-Dist: more_itertools (>=10.2.0)
|
|
28
29
|
Requires-Dist: scrapy (>=2.11.0) ; extra == "scrapy"
|
|
29
30
|
Requires-Dist: typing-extensions (>=4.1.0)
|
|
30
31
|
Requires-Dist: websockets (>=10.0,<14.0.0)
|
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
apify/__init__.py,sha256=99ynaDWBLEcCjdLq7R0Exy_iACsXiXoQ8VUZKmbzTeM,550
|
|
2
|
-
apify/_actor.py,sha256=
|
|
3
|
-
apify/_configuration.py,sha256=
|
|
2
|
+
apify/_actor.py,sha256=X8UCTYVJmsq0Nus7aru1ayEgrbo8QE__0jKaN8aWMZ0,44313
|
|
3
|
+
apify/_configuration.py,sha256=T3Z_o_W98iSyTbrutfb578yW51aexZ_V0FcLwTxFLjI,10878
|
|
4
4
|
apify/_consts.py,sha256=_Xq4hOfOA1iZ3n1P967YWdyncKivpbX6RTlp_qanUoE,330
|
|
5
5
|
apify/_crypto.py,sha256=e0_aM3l9_5Osk-jszYOOjrAKK60OggSHbiw5c30QnsU,5638
|
|
6
6
|
apify/_models.py,sha256=Btlz-23obKY5tJ75JnUwkVNC2lmU1IEBbdU3HvWaVhg,5748
|
|
7
7
|
apify/_platform_event_manager.py,sha256=44xyV0Lpzf4h4VZ0rkyYg_nhbQkEONNor8_Z9gIKO40,7899
|
|
8
|
-
apify/_proxy_configuration.py,sha256=
|
|
8
|
+
apify/_proxy_configuration.py,sha256=c-O6_PZ9pUD-i4J0RFEKTtfyJPP2rTRJJA1TH8NVsV8,13189
|
|
9
9
|
apify/_utils.py,sha256=CCLkpAsZKp00ykm88Z_Fbck5PNT0j6mJYOuD0RxzZUs,1620
|
|
10
10
|
apify/apify_storage_client/__init__.py,sha256=-UbR68bFsDR6ln8OFs4t50eqcnY36hujO-SeOt-KmcA,114
|
|
11
|
-
apify/apify_storage_client/_apify_storage_client.py,sha256=
|
|
12
|
-
apify/apify_storage_client/_dataset_client.py,sha256=
|
|
13
|
-
apify/apify_storage_client/_dataset_collection_client.py,sha256=
|
|
14
|
-
apify/apify_storage_client/_key_value_store_client.py,sha256=
|
|
15
|
-
apify/apify_storage_client/_key_value_store_collection_client.py,sha256=
|
|
16
|
-
apify/apify_storage_client/_request_queue_client.py,sha256=
|
|
17
|
-
apify/apify_storage_client/_request_queue_collection_client.py,sha256=
|
|
11
|
+
apify/apify_storage_client/_apify_storage_client.py,sha256=0rS75JoRHt7stRYS9-oqm3DmaSIZQN5C11N5MZQUvlA,2616
|
|
12
|
+
apify/apify_storage_client/_dataset_client.py,sha256=UUodnR_MQBg5RkURrfegkGJWR5OmdPPgPfGepvkdQoU,5580
|
|
13
|
+
apify/apify_storage_client/_dataset_collection_client.py,sha256=qCcKZlA0bkO-sL7xED0Yose85NlrRa9AKr4oCSrYX6k,1489
|
|
14
|
+
apify/apify_storage_client/_key_value_store_client.py,sha256=MSuoIeqEHLu92WfUU7kyB3Cc_gKUlm8TghnU3_xkPtE,3363
|
|
15
|
+
apify/apify_storage_client/_key_value_store_collection_client.py,sha256=NxD-3XDJP6JGMDyIa6ib0gl8op7rQjSQ0vlToCiV190,1563
|
|
16
|
+
apify/apify_storage_client/_request_queue_client.py,sha256=n-CR-hA5LM6_8IwiMwQ9tT2juavq7X2zC3ZNlrtv-2s,5156
|
|
17
|
+
apify/apify_storage_client/_request_queue_collection_client.py,sha256=MdzgbQb2D8rHWpUlPCrQSHRlAi0fI0PSZ9bYagr-MhY,1571
|
|
18
18
|
apify/apify_storage_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
-
apify/log.py,sha256=
|
|
19
|
+
apify/log.py,sha256=j-E4t-WeA93bc1NCQRG8sTntehQCiiN8ia-MdQe3_Ts,1291
|
|
20
20
|
apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
21
|
apify/scrapy/__init__.py,sha256=HE5wCN7-DZKPydLCOvjNyLuL3CvN2fUFweXfrDfe1Ss,348
|
|
22
22
|
apify/scrapy/middlewares/__init__.py,sha256=tfW-d3WFWLeNEjL8fTmon6NwgD-OXx1Bw2fBdU-wPy4,114
|
|
@@ -30,9 +30,9 @@ apify/scrapy/requests.py,sha256=yZ9hIsz2YyqOoOwzN9F1h76wG4qwawrI6h_6xq0I7Iw,7599
|
|
|
30
30
|
apify/scrapy/scheduler.py,sha256=03kZxejWWb-TofJ-vpSZuQ28rT-qNjhhpC-QeO2OzoU,5977
|
|
31
31
|
apify/scrapy/utils.py,sha256=758DcHCSAgCTProY0QX74uJ1XrzVsQwvCmFanj2f_3Q,2928
|
|
32
32
|
apify/storages/__init__.py,sha256=FW-z6ubuPnHGM-Wp15T8mR5q6lnpDGrCW-IkgZd5L30,177
|
|
33
|
-
apify/storages/_request_list.py,sha256
|
|
33
|
+
apify/storages/_request_list.py,sha256=-lZJcE5nq69aJhGFJ7Sh2ctqgAWUDyOwYm5_0y1hdAE,5865
|
|
34
34
|
apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
apify-2.
|
|
36
|
-
apify-2.
|
|
37
|
-
apify-2.
|
|
38
|
-
apify-2.
|
|
35
|
+
apify-2.2.0.dist-info/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
36
|
+
apify-2.2.0.dist-info/METADATA,sha256=tHrpUBsJOFuCKy1OorldVFCBUYbGw82xxC1BwcbRzvg,8711
|
|
37
|
+
apify-2.2.0.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
|
|
38
|
+
apify-2.2.0.dist-info/RECORD,,
|
|
File without changes
|