apify 2.2.0__py3-none-any.whl → 2.2.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/_actor.py +26 -74
- apify/_configuration.py +0 -12
- apify/_proxy_configuration.py +3 -3
- apify/apify_storage_client/_apify_storage_client.py +2 -12
- apify/apify_storage_client/_dataset_client.py +1 -2
- apify/apify_storage_client/_dataset_collection_client.py +1 -2
- apify/apify_storage_client/_key_value_store_client.py +6 -2
- apify/apify_storage_client/_key_value_store_collection_client.py +1 -2
- apify/apify_storage_client/_request_queue_client.py +22 -2
- apify/apify_storage_client/_request_queue_collection_client.py +1 -2
- apify/log.py +8 -4
- apify/storages/_request_list.py +1 -1
- {apify-2.2.0.dist-info → apify-2.2.0b1.dist-info}/METADATA +4 -5
- {apify-2.2.0.dist-info → apify-2.2.0b1.dist-info}/RECORD +16 -16
- {apify-2.2.0.dist-info → apify-2.2.0b1.dist-info}/WHEEL +1 -1
- {apify-2.2.0.dist-info → apify-2.2.0b1.dist-info}/LICENSE +0 -0
apify/_actor.py
CHANGED
|
@@ -7,14 +7,13 @@ from datetime import timedelta
|
|
|
7
7
|
from typing import TYPE_CHECKING, Any, Callable, TypeVar, cast
|
|
8
8
|
|
|
9
9
|
from lazy_object_proxy import Proxy
|
|
10
|
-
from more_itertools import flatten
|
|
11
10
|
from pydantic import AliasChoices
|
|
12
11
|
|
|
13
12
|
from apify_client import ApifyClientAsync
|
|
14
13
|
from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
|
|
15
14
|
from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
|
|
16
|
-
from crawlee import
|
|
17
|
-
from crawlee.events._types import Event,
|
|
15
|
+
from crawlee import service_container
|
|
16
|
+
from crawlee.events._types import Event, EventPersistStateData
|
|
18
17
|
|
|
19
18
|
from apify._configuration import Configuration
|
|
20
19
|
from apify._consts import EVENT_LISTENERS_TIMEOUT
|
|
@@ -34,7 +33,6 @@ if TYPE_CHECKING:
|
|
|
34
33
|
from typing_extensions import Self
|
|
35
34
|
|
|
36
35
|
from crawlee.proxy_configuration import _NewUrlFunction
|
|
37
|
-
from crawlee.storage_clients import BaseStorageClient
|
|
38
36
|
|
|
39
37
|
from apify._models import Webhook
|
|
40
38
|
|
|
@@ -50,7 +48,6 @@ class _ActorType:
|
|
|
50
48
|
_apify_client: ApifyClientAsync
|
|
51
49
|
_configuration: Configuration
|
|
52
50
|
_is_exiting = False
|
|
53
|
-
_is_rebooting = False
|
|
54
51
|
|
|
55
52
|
def __init__(
|
|
56
53
|
self,
|
|
@@ -72,22 +69,17 @@ class _ActorType:
|
|
|
72
69
|
self._configure_logging = configure_logging
|
|
73
70
|
self._apify_client = self.new_client()
|
|
74
71
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
# Set the event manager based on whether the Actor is running on the platform or locally.
|
|
80
|
-
self._event_manager = (
|
|
81
|
-
PlatformEventManager(
|
|
72
|
+
self._event_manager: EventManager
|
|
73
|
+
if self._configuration.is_at_home:
|
|
74
|
+
self._event_manager = PlatformEventManager(
|
|
82
75
|
config=self._configuration,
|
|
83
76
|
persist_state_interval=self._configuration.persist_state_interval,
|
|
84
77
|
)
|
|
85
|
-
|
|
86
|
-
|
|
78
|
+
else:
|
|
79
|
+
self._event_manager = LocalEventManager(
|
|
87
80
|
system_info_interval=self._configuration.system_info_interval,
|
|
88
81
|
persist_state_interval=self._configuration.persist_state_interval,
|
|
89
82
|
)
|
|
90
|
-
)
|
|
91
83
|
|
|
92
84
|
self._is_initialized = False
|
|
93
85
|
|
|
@@ -100,6 +92,9 @@ class _ActorType:
|
|
|
100
92
|
When you exit the `async with` block, the `Actor.exit()` method is called, and if any exception happens while
|
|
101
93
|
executing the block code, the `Actor.fail` method is called.
|
|
102
94
|
"""
|
|
95
|
+
if self._configure_logging:
|
|
96
|
+
_configure_logging(self._configuration)
|
|
97
|
+
|
|
103
98
|
await self.init()
|
|
104
99
|
return self
|
|
105
100
|
|
|
@@ -159,25 +154,10 @@ class _ActorType:
|
|
|
159
154
|
"""The logging.Logger instance the Actor uses."""
|
|
160
155
|
return logger
|
|
161
156
|
|
|
162
|
-
@property
|
|
163
|
-
def _local_storage_client(self) -> BaseStorageClient:
|
|
164
|
-
"""The local storage client the Actor instance uses."""
|
|
165
|
-
return service_locator.get_storage_client()
|
|
166
|
-
|
|
167
157
|
def _raise_if_not_initialized(self) -> None:
|
|
168
158
|
if not self._is_initialized:
|
|
169
159
|
raise RuntimeError('The Actor was not initialized!')
|
|
170
160
|
|
|
171
|
-
def _raise_if_cloud_requested_but_not_configured(self, *, force_cloud: bool) -> None:
|
|
172
|
-
if not force_cloud:
|
|
173
|
-
return
|
|
174
|
-
|
|
175
|
-
if not self.is_at_home() and self.config.token is None:
|
|
176
|
-
raise RuntimeError(
|
|
177
|
-
'In order to use the Apify cloud storage from your computer, '
|
|
178
|
-
'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
|
|
179
|
-
)
|
|
180
|
-
|
|
181
161
|
async def init(self) -> None:
|
|
182
162
|
"""Initialize the Actor instance.
|
|
183
163
|
|
|
@@ -192,19 +172,18 @@ class _ActorType:
|
|
|
192
172
|
if self._is_initialized:
|
|
193
173
|
raise RuntimeError('The Actor was already initialized!')
|
|
194
174
|
|
|
195
|
-
self.
|
|
196
|
-
|
|
175
|
+
if self._configuration.token:
|
|
176
|
+
service_container.set_cloud_storage_client(ApifyStorageClient(configuration=self._configuration))
|
|
197
177
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
178
|
+
if self._configuration.is_at_home:
|
|
179
|
+
service_container.set_default_storage_client_type('cloud')
|
|
180
|
+
else:
|
|
181
|
+
service_container.set_default_storage_client_type('local')
|
|
201
182
|
|
|
202
|
-
|
|
203
|
-
service_locator.set_configuration(self.configuration)
|
|
183
|
+
service_container.set_event_manager(self._event_manager)
|
|
204
184
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
_configure_logging()
|
|
185
|
+
self._is_exiting = False
|
|
186
|
+
self._was_final_persist_state_emitted = False
|
|
208
187
|
|
|
209
188
|
self.log.info('Initializing Actor...')
|
|
210
189
|
self.log.info('System info', extra=get_system_info())
|
|
@@ -254,6 +233,7 @@ class _ActorType:
|
|
|
254
233
|
await self._event_manager.wait_for_all_listeners_to_complete(timeout=event_listeners_timeout)
|
|
255
234
|
|
|
256
235
|
await self._event_manager.__aexit__(None, None, None)
|
|
236
|
+
cast(dict, service_container._services).clear() # noqa: SLF001
|
|
257
237
|
|
|
258
238
|
await asyncio.wait_for(finalize(), cleanup_timeout.total_seconds())
|
|
259
239
|
self._is_initialized = False
|
|
@@ -355,15 +335,12 @@ class _ActorType:
|
|
|
355
335
|
An instance of the `Dataset` class for the given ID or name.
|
|
356
336
|
"""
|
|
357
337
|
self._raise_if_not_initialized()
|
|
358
|
-
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
|
|
359
|
-
|
|
360
|
-
storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
|
|
361
338
|
|
|
362
339
|
return await Dataset.open(
|
|
363
340
|
id=id,
|
|
364
341
|
name=name,
|
|
365
342
|
configuration=self._configuration,
|
|
366
|
-
storage_client=
|
|
343
|
+
storage_client=service_container.get_storage_client(client_type='cloud' if force_cloud else None),
|
|
367
344
|
)
|
|
368
345
|
|
|
369
346
|
async def open_key_value_store(
|
|
@@ -390,14 +367,12 @@ class _ActorType:
|
|
|
390
367
|
An instance of the `KeyValueStore` class for the given ID or name.
|
|
391
368
|
"""
|
|
392
369
|
self._raise_if_not_initialized()
|
|
393
|
-
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
|
|
394
|
-
storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
|
|
395
370
|
|
|
396
371
|
return await KeyValueStore.open(
|
|
397
372
|
id=id,
|
|
398
373
|
name=name,
|
|
399
374
|
configuration=self._configuration,
|
|
400
|
-
storage_client=
|
|
375
|
+
storage_client=service_container.get_storage_client(client_type='cloud' if force_cloud else None),
|
|
401
376
|
)
|
|
402
377
|
|
|
403
378
|
async def open_request_queue(
|
|
@@ -426,15 +401,12 @@ class _ActorType:
|
|
|
426
401
|
An instance of the `RequestQueue` class for the given ID or name.
|
|
427
402
|
"""
|
|
428
403
|
self._raise_if_not_initialized()
|
|
429
|
-
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
|
|
430
|
-
|
|
431
|
-
storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
|
|
432
404
|
|
|
433
405
|
return await RequestQueue.open(
|
|
434
406
|
id=id,
|
|
435
407
|
name=name,
|
|
436
408
|
configuration=self._configuration,
|
|
437
|
-
storage_client=
|
|
409
|
+
storage_client=service_container.get_storage_client(client_type='cloud' if force_cloud else None),
|
|
438
410
|
)
|
|
439
411
|
|
|
440
412
|
async def push_data(self, data: dict | list[dict]) -> None:
|
|
@@ -854,32 +826,12 @@ class _ActorType:
|
|
|
854
826
|
self.log.error('Actor.reboot() is only supported when running on the Apify platform.')
|
|
855
827
|
return
|
|
856
828
|
|
|
857
|
-
if self._is_rebooting:
|
|
858
|
-
self.log.debug('Actor is already rebooting, skipping the additional reboot call.')
|
|
859
|
-
return
|
|
860
|
-
|
|
861
|
-
self._is_rebooting = True
|
|
862
|
-
|
|
863
829
|
if not custom_after_sleep:
|
|
864
830
|
custom_after_sleep = self._configuration.metamorph_after_sleep
|
|
865
831
|
|
|
866
|
-
|
|
867
|
-
# PERSIST_STATE listeners are called to allow the Actor to persist its state before the reboot.
|
|
868
|
-
# MIGRATING listeners are called to allow the Actor to gracefully stop in-progress tasks before the reboot.
|
|
869
|
-
# Typically, crawlers are listening for the MIIGRATING event to stop processing new requests.
|
|
870
|
-
# We can't just emit the events and wait for all listeners to finish,
|
|
871
|
-
# because this method might be called from an event listener itself, and we would deadlock.
|
|
872
|
-
persist_state_listeners = flatten(
|
|
873
|
-
(self._event_manager._listeners_to_wrappers[Event.PERSIST_STATE] or {}).values() # noqa: SLF001
|
|
874
|
-
)
|
|
875
|
-
migrating_listeners = flatten(
|
|
876
|
-
(self._event_manager._listeners_to_wrappers[Event.MIGRATING] or {}).values() # noqa: SLF001
|
|
877
|
-
)
|
|
832
|
+
self._event_manager.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=True))
|
|
878
833
|
|
|
879
|
-
await
|
|
880
|
-
*[listener(EventPersistStateData(is_migrating=True)) for listener in persist_state_listeners],
|
|
881
|
-
*[listener(EventMigratingData()) for listener in migrating_listeners],
|
|
882
|
-
)
|
|
834
|
+
await self._event_manager.__aexit__(None, None, None)
|
|
883
835
|
|
|
884
836
|
if not self._configuration.actor_run_id:
|
|
885
837
|
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
|
|
@@ -976,7 +928,7 @@ class _ActorType:
|
|
|
976
928
|
password: str | None = None,
|
|
977
929
|
groups: list[str] | None = None,
|
|
978
930
|
country_code: str | None = None,
|
|
979
|
-
proxy_urls: list[str
|
|
931
|
+
proxy_urls: list[str] | None = None,
|
|
980
932
|
new_url_function: _NewUrlFunction | None = None,
|
|
981
933
|
) -> ProxyConfiguration | None:
|
|
982
934
|
"""Create a ProxyConfiguration object with the passed proxy configuration.
|
apify/_configuration.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from datetime import datetime, timedelta
|
|
4
|
-
from logging import getLogger
|
|
5
4
|
from typing import Annotated, Any
|
|
6
5
|
|
|
7
6
|
from pydantic import AliasChoices, BeforeValidator, Field
|
|
@@ -13,8 +12,6 @@ from crawlee.configuration import Configuration as CrawleeConfiguration
|
|
|
13
12
|
|
|
14
13
|
from apify._utils import docs_group
|
|
15
14
|
|
|
16
|
-
logger = getLogger(__name__)
|
|
17
|
-
|
|
18
15
|
|
|
19
16
|
def _transform_to_list(value: Any) -> list[str] | None:
|
|
20
17
|
if value is None:
|
|
@@ -356,15 +353,6 @@ class Configuration(CrawleeConfiguration):
|
|
|
356
353
|
),
|
|
357
354
|
] = None
|
|
358
355
|
|
|
359
|
-
@classmethod
|
|
360
|
-
def get_global_configuration(cls) -> Configuration:
|
|
361
|
-
"""Retrieve the global instance of the configuration.
|
|
362
|
-
|
|
363
|
-
Mostly for the backwards compatibility. It is recommended to use the `service_locator.get_configuration()`
|
|
364
|
-
instead.
|
|
365
|
-
"""
|
|
366
|
-
return cls()
|
|
367
|
-
|
|
368
356
|
|
|
369
357
|
# Monkey-patch the base class so that it works with the extended configuration
|
|
370
358
|
CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore[method-assign]
|
apify/_proxy_configuration.py
CHANGED
|
@@ -111,9 +111,9 @@ class ProxyConfiguration(CrawleeProxyConfiguration):
|
|
|
111
111
|
password: str | None = None,
|
|
112
112
|
groups: list[str] | None = None,
|
|
113
113
|
country_code: str | None = None,
|
|
114
|
-
proxy_urls: list[str
|
|
114
|
+
proxy_urls: list[str] | None = None,
|
|
115
115
|
new_url_function: _NewUrlFunction | None = None,
|
|
116
|
-
tiered_proxy_urls: list[list[str
|
|
116
|
+
tiered_proxy_urls: list[list[str]] | None = None,
|
|
117
117
|
_actor_config: Configuration | None = None,
|
|
118
118
|
_apify_client: ApifyClientAsync | None = None,
|
|
119
119
|
) -> None:
|
|
@@ -148,7 +148,7 @@ class ProxyConfiguration(CrawleeProxyConfiguration):
|
|
|
148
148
|
' "groups" or "country_code".'
|
|
149
149
|
)
|
|
150
150
|
|
|
151
|
-
if proxy_urls and any('apify.com' in
|
|
151
|
+
if proxy_urls and any('apify.com' in url for url in proxy_urls):
|
|
152
152
|
logger.warning(
|
|
153
153
|
'Some Apify proxy features may work incorrectly. Please consider setting up Apify properties '
|
|
154
154
|
'instead of `proxy_urls`.\n'
|
|
@@ -1,13 +1,10 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
4
|
-
|
|
5
1
|
from typing_extensions import override
|
|
6
2
|
|
|
7
3
|
from apify_client import ApifyClientAsync
|
|
8
4
|
from crawlee._utils.crypto import crypto_random_object_id
|
|
9
|
-
from crawlee.
|
|
5
|
+
from crawlee.base_storage_client import BaseStorageClient
|
|
10
6
|
|
|
7
|
+
from apify._configuration import Configuration
|
|
11
8
|
from apify._utils import docs_group
|
|
12
9
|
from apify.apify_storage_client._dataset_client import DatasetClient
|
|
13
10
|
from apify.apify_storage_client._dataset_collection_client import DatasetCollectionClient
|
|
@@ -16,9 +13,6 @@ from apify.apify_storage_client._key_value_store_collection_client import KeyVal
|
|
|
16
13
|
from apify.apify_storage_client._request_queue_client import RequestQueueClient
|
|
17
14
|
from apify.apify_storage_client._request_queue_collection_client import RequestQueueCollectionClient
|
|
18
15
|
|
|
19
|
-
if TYPE_CHECKING:
|
|
20
|
-
from apify._configuration import Configuration
|
|
21
|
-
|
|
22
16
|
|
|
23
17
|
@docs_group('Classes')
|
|
24
18
|
class ApifyStorageClient(BaseStorageClient):
|
|
@@ -35,10 +29,6 @@ class ApifyStorageClient(BaseStorageClient):
|
|
|
35
29
|
)
|
|
36
30
|
self._configuration = configuration
|
|
37
31
|
|
|
38
|
-
@classmethod
|
|
39
|
-
def from_config(cls, config: Configuration) -> ApifyStorageClient:
|
|
40
|
-
return cls(configuration=config)
|
|
41
|
-
|
|
42
32
|
@override
|
|
43
33
|
def dataset(self, id: str) -> DatasetClient:
|
|
44
34
|
return DatasetClient(self._apify_client.dataset(id))
|
|
@@ -4,8 +4,7 @@ from typing import TYPE_CHECKING
|
|
|
4
4
|
|
|
5
5
|
from typing_extensions import override
|
|
6
6
|
|
|
7
|
-
from crawlee.
|
|
8
|
-
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
|
|
7
|
+
from crawlee.base_storage_client import BaseDatasetClient, DatasetItemsListPage, DatasetMetadata
|
|
9
8
|
|
|
10
9
|
if TYPE_CHECKING:
|
|
11
10
|
from collections.abc import AsyncIterator
|
|
@@ -4,8 +4,7 @@ from typing import TYPE_CHECKING
|
|
|
4
4
|
|
|
5
5
|
from typing_extensions import override
|
|
6
6
|
|
|
7
|
-
from crawlee.
|
|
8
|
-
from crawlee.storage_clients.models import DatasetListPage, DatasetMetadata
|
|
7
|
+
from crawlee.base_storage_client import BaseDatasetCollectionClient, DatasetListPage, DatasetMetadata
|
|
9
8
|
|
|
10
9
|
if TYPE_CHECKING:
|
|
11
10
|
from apify_client.clients import DatasetCollectionClientAsync
|
|
@@ -5,8 +5,12 @@ from typing import TYPE_CHECKING, Any
|
|
|
5
5
|
|
|
6
6
|
from typing_extensions import override
|
|
7
7
|
|
|
8
|
-
from crawlee.
|
|
9
|
-
|
|
8
|
+
from crawlee.base_storage_client import (
|
|
9
|
+
BaseKeyValueStoreClient,
|
|
10
|
+
KeyValueStoreListKeysPage,
|
|
11
|
+
KeyValueStoreMetadata,
|
|
12
|
+
KeyValueStoreRecord,
|
|
13
|
+
)
|
|
10
14
|
|
|
11
15
|
if TYPE_CHECKING:
|
|
12
16
|
from collections.abc import AsyncIterator
|
|
@@ -4,8 +4,7 @@ from typing import TYPE_CHECKING
|
|
|
4
4
|
|
|
5
5
|
from typing_extensions import override
|
|
6
6
|
|
|
7
|
-
from crawlee.
|
|
8
|
-
from crawlee.storage_clients.models import KeyValueStoreListPage, KeyValueStoreMetadata
|
|
7
|
+
from crawlee.base_storage_client import BaseKeyValueStoreCollectionClient, KeyValueStoreListPage, KeyValueStoreMetadata
|
|
9
8
|
|
|
10
9
|
if TYPE_CHECKING:
|
|
11
10
|
from apify_client.clients import KeyValueStoreCollectionClientAsync
|
|
@@ -5,8 +5,8 @@ from typing import TYPE_CHECKING
|
|
|
5
5
|
from typing_extensions import override
|
|
6
6
|
|
|
7
7
|
from crawlee import Request
|
|
8
|
-
from crawlee.
|
|
9
|
-
|
|
8
|
+
from crawlee.base_storage_client import (
|
|
9
|
+
BaseRequestQueueClient,
|
|
10
10
|
BatchRequestsOperationResponse,
|
|
11
11
|
ProcessedRequest,
|
|
12
12
|
ProlongRequestLockResponse,
|
|
@@ -80,6 +80,10 @@ class RequestQueueClient(BaseRequestQueueClient):
|
|
|
80
80
|
by_alias=True,
|
|
81
81
|
exclude={
|
|
82
82
|
'id',
|
|
83
|
+
'json_',
|
|
84
|
+
'order_no',
|
|
85
|
+
'query_params',
|
|
86
|
+
'data',
|
|
83
87
|
},
|
|
84
88
|
),
|
|
85
89
|
forefront=forefront,
|
|
@@ -103,6 +107,12 @@ class RequestQueueClient(BaseRequestQueueClient):
|
|
|
103
107
|
| await self._client.update_request(
|
|
104
108
|
request=request.model_dump(
|
|
105
109
|
by_alias=True,
|
|
110
|
+
exclude={
|
|
111
|
+
'json_',
|
|
112
|
+
'order_no',
|
|
113
|
+
'query_params',
|
|
114
|
+
'data',
|
|
115
|
+
},
|
|
106
116
|
),
|
|
107
117
|
forefront=forefront,
|
|
108
118
|
)
|
|
@@ -154,6 +164,10 @@ class RequestQueueClient(BaseRequestQueueClient):
|
|
|
154
164
|
by_alias=True,
|
|
155
165
|
exclude={
|
|
156
166
|
'id',
|
|
167
|
+
'json_',
|
|
168
|
+
'order_no',
|
|
169
|
+
'query_params',
|
|
170
|
+
'data',
|
|
157
171
|
},
|
|
158
172
|
)
|
|
159
173
|
for r in requests
|
|
@@ -169,6 +183,12 @@ class RequestQueueClient(BaseRequestQueueClient):
|
|
|
169
183
|
requests=[
|
|
170
184
|
r.model_dump(
|
|
171
185
|
by_alias=True,
|
|
186
|
+
exclude={
|
|
187
|
+
'json_',
|
|
188
|
+
'order_no',
|
|
189
|
+
'query_params',
|
|
190
|
+
'data',
|
|
191
|
+
},
|
|
172
192
|
)
|
|
173
193
|
for r in requests
|
|
174
194
|
],
|
|
@@ -4,8 +4,7 @@ from typing import TYPE_CHECKING
|
|
|
4
4
|
|
|
5
5
|
from typing_extensions import override
|
|
6
6
|
|
|
7
|
-
from crawlee.
|
|
8
|
-
from crawlee.storage_clients.models import RequestQueueListPage, RequestQueueMetadata
|
|
7
|
+
from crawlee.base_storage_client import BaseRequestQueueCollectionClient, RequestQueueListPage, RequestQueueMetadata
|
|
9
8
|
|
|
10
9
|
if TYPE_CHECKING:
|
|
11
10
|
from apify_client.clients import RequestQueueCollectionClientAsync
|
apify/log.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
4
5
|
|
|
5
6
|
from apify_shared.utils import ignore_docs
|
|
6
7
|
from crawlee._log_config import CrawleeLogFormatter, configure_logger, get_configured_log_level
|
|
7
8
|
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from apify import Configuration
|
|
11
|
+
|
|
8
12
|
# Name of the logger used throughout the library (resolves to 'apify')
|
|
9
13
|
logger_name = __name__.split('.')[0]
|
|
10
14
|
|
|
@@ -17,11 +21,11 @@ class ActorLogFormatter(CrawleeLogFormatter): # noqa: D101 (Inherited from pare
|
|
|
17
21
|
pass
|
|
18
22
|
|
|
19
23
|
|
|
20
|
-
def _configure_logging() -> None:
|
|
24
|
+
def _configure_logging(configuration: Configuration) -> None:
|
|
21
25
|
apify_client_logger = logging.getLogger('apify_client')
|
|
22
|
-
configure_logger(apify_client_logger, remove_old_handlers=True)
|
|
26
|
+
configure_logger(apify_client_logger, configuration, remove_old_handlers=True)
|
|
23
27
|
|
|
24
|
-
level = get_configured_log_level()
|
|
28
|
+
level = get_configured_log_level(configuration)
|
|
25
29
|
|
|
26
30
|
# Keep apify_client logger quiet unless debug logging is requested
|
|
27
31
|
if level > logging.DEBUG:
|
|
@@ -38,4 +42,4 @@ def _configure_logging() -> None:
|
|
|
38
42
|
|
|
39
43
|
# Use configured log level for apify logger
|
|
40
44
|
apify_logger = logging.getLogger('apify')
|
|
41
|
-
configure_logger(apify_logger, remove_old_handlers=True)
|
|
45
|
+
configure_logger(apify_logger, configuration, remove_old_handlers=True)
|
apify/storages/_request_list.py
CHANGED
|
@@ -11,7 +11,7 @@ from pydantic import BaseModel, Field, TypeAdapter
|
|
|
11
11
|
from crawlee import Request
|
|
12
12
|
from crawlee._types import HttpMethod
|
|
13
13
|
from crawlee.http_clients import BaseHttpClient, HttpxHttpClient
|
|
14
|
-
from crawlee.
|
|
14
|
+
from crawlee.storages import RequestList as CrawleeRequestList
|
|
15
15
|
|
|
16
16
|
from apify._utils import docs_group
|
|
17
17
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.0b1
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: apify,sdk,automation,chrome,crawlee,crawler,headless,scraper,scraping
|
|
@@ -21,11 +21,10 @@ Classifier: Topic :: Software Development :: Libraries
|
|
|
21
21
|
Provides-Extra: scrapy
|
|
22
22
|
Requires-Dist: apify-client (>=1.8.1)
|
|
23
23
|
Requires-Dist: apify-shared (>=1.2.1)
|
|
24
|
-
Requires-Dist: crawlee (>=0.
|
|
24
|
+
Requires-Dist: crawlee (>=0.4.0,<0.5.0)
|
|
25
25
|
Requires-Dist: cryptography (>=42.0.0)
|
|
26
|
-
Requires-Dist: httpx (>=0.27.0)
|
|
26
|
+
Requires-Dist: httpx (>=0.27.0,<0.28.0)
|
|
27
27
|
Requires-Dist: lazy-object-proxy (>=1.10.0)
|
|
28
|
-
Requires-Dist: more_itertools (>=10.2.0)
|
|
29
28
|
Requires-Dist: scrapy (>=2.11.0) ; extra == "scrapy"
|
|
30
29
|
Requires-Dist: typing-extensions (>=4.1.0)
|
|
31
30
|
Requires-Dist: websockets (>=10.0,<14.0.0)
|
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
apify/__init__.py,sha256=99ynaDWBLEcCjdLq7R0Exy_iACsXiXoQ8VUZKmbzTeM,550
|
|
2
|
-
apify/_actor.py,sha256=
|
|
3
|
-
apify/_configuration.py,sha256=
|
|
2
|
+
apify/_actor.py,sha256=AUviY4qrX4UoN7fSZtXXSHqEk4rrQwBymMLjkgb4Mzg,41887
|
|
3
|
+
apify/_configuration.py,sha256=_pPkesm1NEE6IxT1Mgxu3fGM89b1rtnLyeC3Hfpbh-Q,10516
|
|
4
4
|
apify/_consts.py,sha256=_Xq4hOfOA1iZ3n1P967YWdyncKivpbX6RTlp_qanUoE,330
|
|
5
5
|
apify/_crypto.py,sha256=e0_aM3l9_5Osk-jszYOOjrAKK60OggSHbiw5c30QnsU,5638
|
|
6
6
|
apify/_models.py,sha256=Btlz-23obKY5tJ75JnUwkVNC2lmU1IEBbdU3HvWaVhg,5748
|
|
7
7
|
apify/_platform_event_manager.py,sha256=44xyV0Lpzf4h4VZ0rkyYg_nhbQkEONNor8_Z9gIKO40,7899
|
|
8
|
-
apify/_proxy_configuration.py,sha256=
|
|
8
|
+
apify/_proxy_configuration.py,sha256=vdDiE5dfyNQYEXKXnj0jcgL3rG3-qanwNSybrVl1xT8,13167
|
|
9
9
|
apify/_utils.py,sha256=CCLkpAsZKp00ykm88Z_Fbck5PNT0j6mJYOuD0RxzZUs,1620
|
|
10
10
|
apify/apify_storage_client/__init__.py,sha256=-UbR68bFsDR6ln8OFs4t50eqcnY36hujO-SeOt-KmcA,114
|
|
11
|
-
apify/apify_storage_client/_apify_storage_client.py,sha256=
|
|
12
|
-
apify/apify_storage_client/_dataset_client.py,sha256=
|
|
13
|
-
apify/apify_storage_client/_dataset_collection_client.py,sha256=
|
|
14
|
-
apify/apify_storage_client/_key_value_store_client.py,sha256=
|
|
15
|
-
apify/apify_storage_client/_key_value_store_collection_client.py,sha256=
|
|
16
|
-
apify/apify_storage_client/_request_queue_client.py,sha256=
|
|
17
|
-
apify/apify_storage_client/_request_queue_collection_client.py,sha256=
|
|
11
|
+
apify/apify_storage_client/_apify_storage_client.py,sha256=NsZBleJNHLBXVyG__bVjdCGEI30cnmVZngCbQaVekfk,2397
|
|
12
|
+
apify/apify_storage_client/_dataset_client.py,sha256=FfXew6tBiZRkpovyPaQ__xhtZZ-rZvjijwBIIyRahH8,5536
|
|
13
|
+
apify/apify_storage_client/_dataset_collection_client.py,sha256=fkYvYGQCigHD2CDzpWk0swNAkfvAinAhMGpYqllle3E,1445
|
|
14
|
+
apify/apify_storage_client/_key_value_store_client.py,sha256=uyeQgb75sGFsqIS4sq4hEZ3QP81COLfS3tmTqHc0tso,3340
|
|
15
|
+
apify/apify_storage_client/_key_value_store_collection_client.py,sha256=vCtMTI-jx89Qp5WHILDNkCthwLuv0MAwm1J_5E4aypU,1519
|
|
16
|
+
apify/apify_storage_client/_request_queue_client.py,sha256=jAiFkaJ38_myHFGTw-Rk21wmpbN0UCR2w2SFoimFGFc,5826
|
|
17
|
+
apify/apify_storage_client/_request_queue_collection_client.py,sha256=NnO73UJ9ZrjV8xoudo30wfaM-SojRkG0guhxDyB-K1g,1527
|
|
18
18
|
apify/apify_storage_client/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
-
apify/log.py,sha256=
|
|
19
|
+
apify/log.py,sha256=zElFyEp2RJN0kiHEwJhcjSCAuHrba5zYiq4pK2xsL_o,1450
|
|
20
20
|
apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
21
|
apify/scrapy/__init__.py,sha256=HE5wCN7-DZKPydLCOvjNyLuL3CvN2fUFweXfrDfe1Ss,348
|
|
22
22
|
apify/scrapy/middlewares/__init__.py,sha256=tfW-d3WFWLeNEjL8fTmon6NwgD-OXx1Bw2fBdU-wPy4,114
|
|
@@ -30,9 +30,9 @@ apify/scrapy/requests.py,sha256=yZ9hIsz2YyqOoOwzN9F1h76wG4qwawrI6h_6xq0I7Iw,7599
|
|
|
30
30
|
apify/scrapy/scheduler.py,sha256=03kZxejWWb-TofJ-vpSZuQ28rT-qNjhhpC-QeO2OzoU,5977
|
|
31
31
|
apify/scrapy/utils.py,sha256=758DcHCSAgCTProY0QX74uJ1XrzVsQwvCmFanj2f_3Q,2928
|
|
32
32
|
apify/storages/__init__.py,sha256=FW-z6ubuPnHGM-Wp15T8mR5q6lnpDGrCW-IkgZd5L30,177
|
|
33
|
-
apify/storages/_request_list.py,sha256
|
|
33
|
+
apify/storages/_request_list.py,sha256=4nrvSdMUF-kiwGVIPEfIOygLKgjUpO37Jl8Om-jRbIU,5858
|
|
34
34
|
apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
apify-2.2.
|
|
36
|
-
apify-2.2.
|
|
37
|
-
apify-2.2.
|
|
38
|
-
apify-2.2.
|
|
35
|
+
apify-2.2.0b1.dist-info/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
36
|
+
apify-2.2.0b1.dist-info/METADATA,sha256=ZWs1aVC-kxxxuYeI-kUZPYoQut8U-lUeSOruPJALYIw,8680
|
|
37
|
+
apify-2.2.0b1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
38
|
+
apify-2.2.0b1.dist-info/RECORD,,
|
|
File without changes
|