apify 3.0.0rc1__py3-none-any.whl → 3.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/_actor.py +150 -117
- apify/_charging.py +19 -0
- apify/_configuration.py +51 -11
- apify/events/__init__.py +2 -2
- apify/storage_clients/__init__.py +2 -0
- apify/storage_clients/_apify/_dataset_client.py +47 -23
- apify/storage_clients/_apify/_key_value_store_client.py +46 -22
- apify/storage_clients/_apify/_models.py +25 -1
- apify/storage_clients/_apify/_request_queue_client.py +188 -648
- apify/storage_clients/_apify/_request_queue_shared_client.py +527 -0
- apify/storage_clients/_apify/_request_queue_single_client.py +399 -0
- apify/storage_clients/_apify/_storage_client.py +55 -29
- apify/storage_clients/_apify/_utils.py +194 -0
- apify/storage_clients/_file_system/_key_value_store_client.py +70 -3
- apify/storage_clients/_file_system/_storage_client.py +7 -1
- apify/storage_clients/_smart_apify/__init__.py +1 -0
- apify/storage_clients/_smart_apify/_storage_client.py +117 -0
- {apify-3.0.0rc1.dist-info → apify-3.0.1.dist-info}/METADATA +20 -5
- {apify-3.0.0rc1.dist-info → apify-3.0.1.dist-info}/RECORD +21 -16
- {apify-3.0.0rc1.dist-info → apify-3.0.1.dist-info}/WHEEL +0 -0
- {apify-3.0.0rc1.dist-info → apify-3.0.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
from asyncio import Lock
|
|
6
|
+
from base64 import b64encode
|
|
7
|
+
from hashlib import sha256
|
|
8
|
+
from logging import getLogger
|
|
9
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
10
|
+
|
|
11
|
+
from apify_client import ApifyClientAsync
|
|
12
|
+
from crawlee._utils.crypto import compute_short_hash
|
|
13
|
+
|
|
14
|
+
from apify._configuration import Configuration
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from types import TracebackType
|
|
18
|
+
|
|
19
|
+
from apify_client.clients import KeyValueStoreClientAsync
|
|
20
|
+
from crawlee.storages import Dataset, KeyValueStore, RequestQueue
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
logger = getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AliasResolver:
|
|
27
|
+
"""Class for handling aliases.
|
|
28
|
+
|
|
29
|
+
The purpose of this is class is to ensure that alias storages are created with correct id. This is achieved by using
|
|
30
|
+
default kvs as a storage for global mapping of aliases to storage ids. Same mapping is also kept in memory to avoid
|
|
31
|
+
unnecessary calls to API and also have limited support of alias storages when not running on Apify platform. When on
|
|
32
|
+
Apify platform, the storages created with alias are accessible by the same alias even after migration or reboot.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
_alias_map: ClassVar[dict[str, str]] = {}
|
|
36
|
+
"""Map containing pre-existing alias storages and their ids. Global for all instances."""
|
|
37
|
+
_alias_init_lock: Lock | None = None
|
|
38
|
+
"""Lock for creating alias storages. Only one alias storage can be created at the time. Global for all instances."""
|
|
39
|
+
|
|
40
|
+
_ALIAS_STORAGE_KEY_SEPARATOR = ','
|
|
41
|
+
_ALIAS_MAPPING_KEY = '__STORAGE_ALIASES_MAPPING'
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self, storage_type: type[Dataset | KeyValueStore | RequestQueue], alias: str, configuration: Configuration
|
|
45
|
+
) -> None:
|
|
46
|
+
self._storage_type = storage_type
|
|
47
|
+
self._alias = alias
|
|
48
|
+
self._additional_cache_key = hash_api_base_url_and_token(configuration)
|
|
49
|
+
|
|
50
|
+
async def __aenter__(self) -> AliasResolver:
|
|
51
|
+
"""Context manager to prevent race condition in alias creation."""
|
|
52
|
+
lock = await self._get_alias_init_lock()
|
|
53
|
+
await lock.acquire()
|
|
54
|
+
return self
|
|
55
|
+
|
|
56
|
+
async def __aexit__(
|
|
57
|
+
self, exc_type: type[BaseException] | None, exc_value: BaseException | None, exc_traceback: TracebackType | None
|
|
58
|
+
) -> None:
|
|
59
|
+
lock = await self._get_alias_init_lock()
|
|
60
|
+
lock.release()
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
async def _get_alias_init_lock(cls) -> Lock:
|
|
64
|
+
"""Get lock for controlling the creation of the alias storages.
|
|
65
|
+
|
|
66
|
+
The lock is shared for all instances of the AliasResolver class.
|
|
67
|
+
It is created in async method to ensure that some event loop is already running.
|
|
68
|
+
"""
|
|
69
|
+
if cls._alias_init_lock is None:
|
|
70
|
+
cls._alias_init_lock = Lock()
|
|
71
|
+
return cls._alias_init_lock
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
async def _get_alias_map(cls) -> dict[str, str]:
|
|
75
|
+
"""Get the aliases and storage ids mapping from the default kvs.
|
|
76
|
+
|
|
77
|
+
Mapping is loaded from kvs only once and is shared for all instances of the AliasResolver class.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Map of aliases and storage ids.
|
|
81
|
+
"""
|
|
82
|
+
if not cls._alias_map and Configuration.get_global_configuration().is_at_home:
|
|
83
|
+
default_kvs_client = await _get_default_kvs_client()
|
|
84
|
+
|
|
85
|
+
record = await default_kvs_client.get_record(cls._ALIAS_MAPPING_KEY)
|
|
86
|
+
|
|
87
|
+
# get_record can return {key: ..., value: ..., content_type: ...}
|
|
88
|
+
if isinstance(record, dict):
|
|
89
|
+
if 'value' in record and isinstance(record['value'], dict):
|
|
90
|
+
cls._alias_map = record['value']
|
|
91
|
+
else:
|
|
92
|
+
cls._alias_map = record
|
|
93
|
+
else:
|
|
94
|
+
cls._alias_map = dict[str, str]()
|
|
95
|
+
|
|
96
|
+
return cls._alias_map
|
|
97
|
+
|
|
98
|
+
async def resolve_id(self) -> str | None:
|
|
99
|
+
"""Get id of the aliased storage.
|
|
100
|
+
|
|
101
|
+
Either locate the id in the in-memory mapping or create the new storage.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Storage id if it exists, None otherwise.
|
|
105
|
+
"""
|
|
106
|
+
return (await self._get_alias_map()).get(self._storage_key, None)
|
|
107
|
+
|
|
108
|
+
async def store_mapping(self, storage_id: str) -> None:
|
|
109
|
+
"""Add alias and related storage id to the mapping in default kvs and local in-memory mapping."""
|
|
110
|
+
# Update in-memory mapping
|
|
111
|
+
(await self._get_alias_map())[self._storage_key] = storage_id
|
|
112
|
+
if not Configuration.get_global_configuration().is_at_home:
|
|
113
|
+
logging.getLogger(__name__).debug(
|
|
114
|
+
'AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.'
|
|
115
|
+
)
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
default_kvs_client = await _get_default_kvs_client()
|
|
119
|
+
await default_kvs_client.get()
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
record = await default_kvs_client.get_record(self._ALIAS_MAPPING_KEY)
|
|
123
|
+
|
|
124
|
+
# get_record can return {key: ..., value: ..., content_type: ...}
|
|
125
|
+
if isinstance(record, dict) and 'value' in record:
|
|
126
|
+
record = record['value']
|
|
127
|
+
|
|
128
|
+
# Update or create the record with the new alias mapping
|
|
129
|
+
if isinstance(record, dict):
|
|
130
|
+
record[self._storage_key] = storage_id
|
|
131
|
+
else:
|
|
132
|
+
record = {self._storage_key: storage_id}
|
|
133
|
+
|
|
134
|
+
# Store the mapping back in the KVS.
|
|
135
|
+
await default_kvs_client.set_record(self._ALIAS_MAPPING_KEY, record)
|
|
136
|
+
except Exception as exc:
|
|
137
|
+
logger.warning(f'Error storing alias mapping for {self._alias}: {exc}')
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def _storage_key(self) -> str:
|
|
141
|
+
"""Get a unique storage key used for storing the alias in the mapping."""
|
|
142
|
+
return self._ALIAS_STORAGE_KEY_SEPARATOR.join(
|
|
143
|
+
[
|
|
144
|
+
self._storage_type.__name__,
|
|
145
|
+
self._alias,
|
|
146
|
+
self._additional_cache_key,
|
|
147
|
+
]
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
async def _get_default_kvs_client() -> KeyValueStoreClientAsync:
|
|
152
|
+
"""Get a client for the default key-value store."""
|
|
153
|
+
configuration = Configuration.get_global_configuration()
|
|
154
|
+
|
|
155
|
+
apify_client_async = ApifyClientAsync(
|
|
156
|
+
token=configuration.token,
|
|
157
|
+
api_url=configuration.api_base_url,
|
|
158
|
+
max_retries=8,
|
|
159
|
+
min_delay_between_retries_millis=500,
|
|
160
|
+
timeout_secs=360,
|
|
161
|
+
)
|
|
162
|
+
if not configuration.default_key_value_store_id:
|
|
163
|
+
raise ValueError("'Configuration.default_key_value_store_id' must be set.")
|
|
164
|
+
return apify_client_async.key_value_store(key_value_store_id=configuration.default_key_value_store_id)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def hash_api_base_url_and_token(configuration: Configuration) -> str:
|
|
168
|
+
"""Hash configuration.api_public_base_url and configuration.token in deterministic way."""
|
|
169
|
+
if configuration.api_public_base_url is None or configuration.token is None:
|
|
170
|
+
raise ValueError("'Configuration.api_public_base_url' and 'Configuration.token' must be set.")
|
|
171
|
+
return compute_short_hash(f'{configuration.api_public_base_url}{configuration.token}'.encode())
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def unique_key_to_request_id(unique_key: str, *, request_id_length: int = 15) -> str:
|
|
175
|
+
"""Generate a deterministic request ID based on a unique key.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
unique_key: The unique key to convert into a request ID.
|
|
179
|
+
request_id_length: The length of the request ID.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
A URL-safe, truncated request ID based on the unique key.
|
|
183
|
+
"""
|
|
184
|
+
# Encode the unique key and compute its SHA-256 hash
|
|
185
|
+
hashed_key = sha256(unique_key.encode('utf-8')).digest()
|
|
186
|
+
|
|
187
|
+
# Encode the hash in base64 and decode it to get a string
|
|
188
|
+
base64_encoded = b64encode(hashed_key).decode('utf-8')
|
|
189
|
+
|
|
190
|
+
# Remove characters that are not URL-safe ('+', '/', or '=')
|
|
191
|
+
url_safe_key = re.sub(r'(\+|\/|=)', '', base64_encoded)
|
|
192
|
+
|
|
193
|
+
# Truncate the key to the desired length
|
|
194
|
+
return url_safe_key[:request_id_length]
|
|
@@ -1,12 +1,19 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
2
4
|
|
|
3
|
-
from
|
|
5
|
+
from more_itertools import flatten
|
|
6
|
+
from typing_extensions import Self, override
|
|
4
7
|
|
|
5
8
|
from crawlee._consts import METADATA_FILENAME
|
|
9
|
+
from crawlee.configuration import Configuration as CrawleeConfiguration
|
|
6
10
|
from crawlee.storage_clients._file_system import FileSystemKeyValueStoreClient
|
|
11
|
+
from crawlee.storage_clients.models import KeyValueStoreRecord
|
|
7
12
|
|
|
8
13
|
from apify._configuration import Configuration
|
|
9
14
|
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
10
17
|
|
|
11
18
|
class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
|
|
12
19
|
"""Apify-specific implementation of the `FileSystemKeyValueStoreClient`.
|
|
@@ -15,6 +22,22 @@ class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
|
|
|
15
22
|
directory, except for the metadata file and the `INPUT.json` file.
|
|
16
23
|
"""
|
|
17
24
|
|
|
25
|
+
@override
|
|
26
|
+
@classmethod
|
|
27
|
+
async def open(
|
|
28
|
+
cls,
|
|
29
|
+
*,
|
|
30
|
+
id: str | None,
|
|
31
|
+
name: str | None,
|
|
32
|
+
alias: str | None,
|
|
33
|
+
configuration: CrawleeConfiguration,
|
|
34
|
+
) -> Self:
|
|
35
|
+
client = await super().open(id=id, name=name, alias=alias, configuration=configuration)
|
|
36
|
+
|
|
37
|
+
await client._sanitize_input_json_files() # noqa: SLF001 - it's okay, this is a factory method
|
|
38
|
+
|
|
39
|
+
return client
|
|
40
|
+
|
|
18
41
|
@override
|
|
19
42
|
async def purge(self) -> None:
|
|
20
43
|
"""Purges the key-value store by deleting all its contents.
|
|
@@ -22,10 +45,16 @@ class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
|
|
|
22
45
|
It deletes all files in the key-value store directory, except for the metadata file and
|
|
23
46
|
the `INPUT.json` file. It also updates the metadata to reflect that the store has been purged.
|
|
24
47
|
"""
|
|
25
|
-
|
|
48
|
+
configuration = Configuration.get_global_configuration()
|
|
49
|
+
|
|
26
50
|
async with self._lock:
|
|
51
|
+
files_to_keep = set(
|
|
52
|
+
flatten([key, f'{key}.{METADATA_FILENAME}'] for key in configuration.input_key_candidates)
|
|
53
|
+
)
|
|
54
|
+
files_to_keep.add(METADATA_FILENAME)
|
|
55
|
+
|
|
27
56
|
for file_path in self.path_to_kvs.glob('*'):
|
|
28
|
-
if file_path.name in
|
|
57
|
+
if file_path.name in files_to_keep:
|
|
29
58
|
continue
|
|
30
59
|
if file_path.is_file():
|
|
31
60
|
await asyncio.to_thread(file_path.unlink, missing_ok=True)
|
|
@@ -34,3 +63,41 @@ class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
|
|
|
34
63
|
update_accessed_at=True,
|
|
35
64
|
update_modified_at=True,
|
|
36
65
|
)
|
|
66
|
+
|
|
67
|
+
async def _sanitize_input_json_files(self) -> None:
|
|
68
|
+
"""Handle missing metadata for input files."""
|
|
69
|
+
configuration = Configuration.get_global_configuration()
|
|
70
|
+
alternative_keys = configuration.input_key_candidates - {configuration.canonical_input_key}
|
|
71
|
+
|
|
72
|
+
if (self.path_to_kvs / configuration.canonical_input_key).exists():
|
|
73
|
+
# Refresh metadata to prevent inconsistencies
|
|
74
|
+
input_data = await asyncio.to_thread(
|
|
75
|
+
lambda: json.loads((self.path_to_kvs / configuration.canonical_input_key).read_text())
|
|
76
|
+
)
|
|
77
|
+
await self.set_value(key=configuration.canonical_input_key, value=input_data)
|
|
78
|
+
|
|
79
|
+
for alternative_key in alternative_keys:
|
|
80
|
+
if (alternative_input_file := self.path_to_kvs / alternative_key).exists():
|
|
81
|
+
logger.warning(f'Redundant input file found: {alternative_input_file}')
|
|
82
|
+
else:
|
|
83
|
+
for alternative_key in alternative_keys:
|
|
84
|
+
alternative_input_file = self.path_to_kvs / alternative_key
|
|
85
|
+
|
|
86
|
+
# Only process files that actually exist
|
|
87
|
+
if alternative_input_file.exists():
|
|
88
|
+
# Refresh metadata to prevent inconsistencies
|
|
89
|
+
with alternative_input_file.open() as f:
|
|
90
|
+
input_data = await asyncio.to_thread(lambda: json.load(f))
|
|
91
|
+
await self.set_value(key=alternative_key, value=input_data)
|
|
92
|
+
|
|
93
|
+
@override
|
|
94
|
+
async def get_value(self, *, key: str) -> KeyValueStoreRecord | None:
|
|
95
|
+
configuration = Configuration.get_global_configuration()
|
|
96
|
+
|
|
97
|
+
if key in configuration.input_key_candidates:
|
|
98
|
+
for candidate in configuration.input_key_candidates:
|
|
99
|
+
value = await super().get_value(key=candidate)
|
|
100
|
+
if value is not None:
|
|
101
|
+
return value
|
|
102
|
+
|
|
103
|
+
return await super().get_value(key=key)
|
|
@@ -27,9 +27,15 @@ class ApifyFileSystemStorageClient(FileSystemStorageClient):
|
|
|
27
27
|
*,
|
|
28
28
|
id: str | None = None,
|
|
29
29
|
name: str | None = None,
|
|
30
|
+
alias: str | None = None,
|
|
30
31
|
configuration: Configuration | None = None,
|
|
31
32
|
) -> FileSystemKeyValueStoreClient:
|
|
32
33
|
configuration = configuration or Configuration.get_global_configuration()
|
|
33
|
-
client = await ApifyFileSystemKeyValueStoreClient.open(
|
|
34
|
+
client = await ApifyFileSystemKeyValueStoreClient.open(
|
|
35
|
+
id=id,
|
|
36
|
+
name=name,
|
|
37
|
+
alias=alias,
|
|
38
|
+
configuration=configuration,
|
|
39
|
+
)
|
|
34
40
|
await self._purge_if_needed(client, configuration)
|
|
35
41
|
return client
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from ._storage_client import SmartApifyStorageClient
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from typing_extensions import override
|
|
6
|
+
|
|
7
|
+
from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, RequestQueueClient, StorageClient
|
|
8
|
+
|
|
9
|
+
from apify._configuration import Configuration as ApifyConfiguration
|
|
10
|
+
from apify._utils import docs_group
|
|
11
|
+
from apify.storage_clients import ApifyStorageClient
|
|
12
|
+
from apify.storage_clients._file_system import ApifyFileSystemStorageClient
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from collections.abc import Hashable
|
|
16
|
+
|
|
17
|
+
from crawlee.configuration import Configuration as CrawleeConfiguration
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@docs_group('Storage clients')
|
|
21
|
+
class SmartApifyStorageClient(StorageClient):
|
|
22
|
+
"""SmartApifyStorageClient that delegates to cloud_storage_client or local_storage_client.
|
|
23
|
+
|
|
24
|
+
When running on Apify platform use cloud_storage_client, else use local_storage_client. This storage client is
|
|
25
|
+
designed to work specifically in Actor context.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
*,
|
|
31
|
+
cloud_storage_client: ApifyStorageClient | None = None,
|
|
32
|
+
local_storage_client: StorageClient | None = None,
|
|
33
|
+
) -> None:
|
|
34
|
+
"""Initialize the Apify storage client.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
cloud_storage_client: Client used to communicate with the Apify platform storage. Either through
|
|
38
|
+
`force_cloud` argument when opening storages or automatically when running on the Apify platform.
|
|
39
|
+
local_storage_client: Client used to communicate with the storage when not running on the Apify
|
|
40
|
+
platform and not using `force_cloud` argument when opening storages.
|
|
41
|
+
"""
|
|
42
|
+
self._cloud_storage_client = cloud_storage_client or ApifyStorageClient(request_queue_access='single')
|
|
43
|
+
self._local_storage_client = local_storage_client or ApifyFileSystemStorageClient()
|
|
44
|
+
|
|
45
|
+
def __str__(self) -> str:
|
|
46
|
+
return (
|
|
47
|
+
f'{self.__class__.__name__}(cloud_storage_client={self._cloud_storage_client.__class__.__name__},'
|
|
48
|
+
f' local_storage_client={self._local_storage_client.__class__.__name__})'
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def get_suitable_storage_client(self, *, force_cloud: bool = False) -> StorageClient:
|
|
52
|
+
"""Get a suitable storage client based on the global configuration and the value of the force_cloud flag.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
force_cloud: If True, return `cloud_storage_client`.
|
|
56
|
+
"""
|
|
57
|
+
if ApifyConfiguration.get_global_configuration().is_at_home:
|
|
58
|
+
return self._cloud_storage_client
|
|
59
|
+
|
|
60
|
+
configuration = ApifyConfiguration.get_global_configuration()
|
|
61
|
+
if force_cloud:
|
|
62
|
+
if configuration.token is None:
|
|
63
|
+
raise RuntimeError(
|
|
64
|
+
'In order to use the Apify cloud storage from your computer, '
|
|
65
|
+
'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
|
|
66
|
+
)
|
|
67
|
+
return self._cloud_storage_client
|
|
68
|
+
|
|
69
|
+
return self._local_storage_client
|
|
70
|
+
|
|
71
|
+
@override
|
|
72
|
+
def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
|
|
73
|
+
if ApifyConfiguration.get_global_configuration().is_at_home:
|
|
74
|
+
if isinstance(configuration, ApifyConfiguration):
|
|
75
|
+
return self._cloud_storage_client.get_storage_client_cache_key(configuration)
|
|
76
|
+
raise TypeError('Expecting ApifyConfiguration')
|
|
77
|
+
|
|
78
|
+
return self._local_storage_client.get_storage_client_cache_key(configuration)
|
|
79
|
+
|
|
80
|
+
@override
|
|
81
|
+
async def create_dataset_client(
|
|
82
|
+
self,
|
|
83
|
+
*,
|
|
84
|
+
id: str | None = None,
|
|
85
|
+
name: str | None = None,
|
|
86
|
+
alias: str | None = None,
|
|
87
|
+
configuration: CrawleeConfiguration | None = None,
|
|
88
|
+
) -> DatasetClient:
|
|
89
|
+
return await self.get_suitable_storage_client().create_dataset_client(
|
|
90
|
+
id=id, name=id, alias=alias, configuration=configuration
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
@override
|
|
94
|
+
async def create_kvs_client(
|
|
95
|
+
self,
|
|
96
|
+
*,
|
|
97
|
+
id: str | None = None,
|
|
98
|
+
name: str | None = None,
|
|
99
|
+
alias: str | None = None,
|
|
100
|
+
configuration: CrawleeConfiguration | None = None,
|
|
101
|
+
) -> KeyValueStoreClient:
|
|
102
|
+
return await self.get_suitable_storage_client().create_kvs_client(
|
|
103
|
+
id=id, name=id, alias=alias, configuration=configuration
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
@override
|
|
107
|
+
async def create_rq_client(
|
|
108
|
+
self,
|
|
109
|
+
*,
|
|
110
|
+
id: str | None = None,
|
|
111
|
+
name: str | None = None,
|
|
112
|
+
alias: str | None = None,
|
|
113
|
+
configuration: CrawleeConfiguration | None = None,
|
|
114
|
+
) -> RequestQueueClient:
|
|
115
|
+
return await self.get_suitable_storage_client().create_rq_client(
|
|
116
|
+
id=id, name=id, alias=alias, configuration=configuration
|
|
117
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 3.0.
|
|
3
|
+
Version: 3.0.1
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
|
|
@@ -228,10 +228,10 @@ Requires-Python: >=3.10
|
|
|
228
228
|
Requires-Dist: apify-client<3.0.0,>=2.0.0
|
|
229
229
|
Requires-Dist: apify-shared<3.0.0,>=2.0.0
|
|
230
230
|
Requires-Dist: cachetools>=5.5.0
|
|
231
|
-
Requires-Dist: crawlee
|
|
231
|
+
Requires-Dist: crawlee<2.0.0,>=1.0.2
|
|
232
232
|
Requires-Dist: cryptography>=42.0.0
|
|
233
|
-
Requires-Dist: impit>=0.
|
|
234
|
-
Requires-Dist: lazy-object-proxy
|
|
233
|
+
Requires-Dist: impit>=0.6.1
|
|
234
|
+
Requires-Dist: lazy-object-proxy>=1.11.0
|
|
235
235
|
Requires-Dist: more-itertools>=10.2.0
|
|
236
236
|
Requires-Dist: typing-extensions>=4.1.0
|
|
237
237
|
Requires-Dist: websockets>=14.0
|
|
@@ -240,7 +240,22 @@ Provides-Extra: scrapy
|
|
|
240
240
|
Requires-Dist: scrapy>=2.11.0; extra == 'scrapy'
|
|
241
241
|
Description-Content-Type: text/markdown
|
|
242
242
|
|
|
243
|
-
|
|
243
|
+
<h1 align=center>Apify SDK for Python</h1>
|
|
244
|
+
|
|
245
|
+
<p align=center>
|
|
246
|
+
<a href="https://badge.fury.io/py/apify" rel="nofollow">
|
|
247
|
+
<img src="https://badge.fury.io/py/apify.svg" alt="PyPI version" style="max-width: 100%;">
|
|
248
|
+
</a>
|
|
249
|
+
<a href="https://pypi.org/project/apify/" rel="nofollow">
|
|
250
|
+
<img src="https://img.shields.io/pypi/dm/apify" alt="PyPI - Downloads" style="max-width: 100%;">
|
|
251
|
+
</a>
|
|
252
|
+
<a href="https://pypi.org/project/apify/" rel="nofollow">
|
|
253
|
+
<img src="https://img.shields.io/pypi/pyversions/apify" alt="PyPI - Python Version" style="max-width: 100%;">
|
|
254
|
+
</a>
|
|
255
|
+
<a href="https://discord.gg/jyEM2PRvMU" rel="nofollow">
|
|
256
|
+
<img src="https://img.shields.io/discord/801163717915574323?label=discord" alt="Chat on discord" style="max-width: 100%;">
|
|
257
|
+
</a>
|
|
258
|
+
</p>
|
|
244
259
|
|
|
245
260
|
The Apify SDK for Python is the official library to create [Apify Actors](https://docs.apify.com/platform/actors)
|
|
246
261
|
in Python. It provides useful features like Actor lifecycle management, local storage emulation, and Actor
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
apify/__init__.py,sha256=HpgKg2FZWJuSPfDygzJ62psylhw4NN4tKFnoYUIhcd4,838
|
|
2
|
-
apify/_actor.py,sha256=
|
|
3
|
-
apify/_charging.py,sha256=
|
|
4
|
-
apify/_configuration.py,sha256=
|
|
2
|
+
apify/_actor.py,sha256=DYHoyBAu6hDLs0BcTZL-IQveLK8gPTWvb6AgDnJc3EA,54755
|
|
3
|
+
apify/_charging.py,sha256=KjZ2DnEMS0Tt8ibizmmt0RwBq8FOAsD1z-hKFgdazcY,13143
|
|
4
|
+
apify/_configuration.py,sha256=7ZHhgRp98kr35zx4k4EB2aImq7Dq1FJjPg7r5bucv_M,14984
|
|
5
5
|
apify/_consts.py,sha256=CjhyEJ4Mi0lcIrzfqz8dN7nPJWGjCeBrrXQy1PZ6zRI,440
|
|
6
6
|
apify/_crypto.py,sha256=tqUs13QkemDtGzvU41pIA2HUEawpDlgzqbwKjm4I8kM,6852
|
|
7
7
|
apify/_models.py,sha256=EzU-inWeJ7T5HNVYEwnYb79W-q4OAPhtrYctfRYzpTE,7848
|
|
@@ -9,7 +9,7 @@ apify/_proxy_configuration.py,sha256=K9V4vG9-bAx7_a4l0zHhfbzvzopJeHek-qUJ05aQ6wI
|
|
|
9
9
|
apify/_utils.py,sha256=og_zzRXELQmirklJWp1kSV7pwQPFfAE81UO3IZ4xBNs,2414
|
|
10
10
|
apify/log.py,sha256=Ry251sK4qPRGiFGe3DKcOZazg5OzC6RXS546Zzk8H2M,1003
|
|
11
11
|
apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
apify/events/__init__.py,sha256=
|
|
12
|
+
apify/events/__init__.py,sha256=fLNqlsM6AboUQrAxmb-GD1Pw6oDivN_eSAviGJtqc7c,198
|
|
13
13
|
apify/events/_apify_event_manager.py,sha256=yArFrKa4wWDZo32iwaA3F_w36VSJf1Yaj_L1opo8ncU,5917
|
|
14
14
|
apify/events/_types.py,sha256=F0BHgACqnRfmdQ9GUcpnZvPxzw2bdRr8BqbGSA4cHeQ,3050
|
|
15
15
|
apify/events/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -32,21 +32,26 @@ apify/scrapy/middlewares/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3h
|
|
|
32
32
|
apify/scrapy/pipelines/__init__.py,sha256=GWPeLN_Zwj8vRBWtXW6DaxdB7mvyQ7Jw5Tz1ccgWlZI,119
|
|
33
33
|
apify/scrapy/pipelines/actor_dataset_push.py,sha256=XUUyznQTD-E3wYUUFt2WAOnWhbnRrY0WuedlfYfYhDI,846
|
|
34
34
|
apify/scrapy/pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
apify/storage_clients/__init__.py,sha256=
|
|
35
|
+
apify/storage_clients/__init__.py,sha256=JheTvNpVD_luQXC1KTEgtr6yVnuMEC9ajBNLCX3HuSo,358
|
|
36
36
|
apify/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
37
|
apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBsWa15Y5e7RMU,347
|
|
38
|
-
apify/storage_clients/_apify/_dataset_client.py,sha256=
|
|
39
|
-
apify/storage_clients/_apify/_key_value_store_client.py,sha256=
|
|
40
|
-
apify/storage_clients/_apify/_models.py,sha256=
|
|
41
|
-
apify/storage_clients/_apify/_request_queue_client.py,sha256=
|
|
42
|
-
apify/storage_clients/_apify/
|
|
38
|
+
apify/storage_clients/_apify/_dataset_client.py,sha256=Bb3UwOaFkyuEY7tuBf8K46R4ZP_b1EaAkDOXOqwSoW8,12498
|
|
39
|
+
apify/storage_clients/_apify/_key_value_store_client.py,sha256=42dARbLX2oeOW7uYYKkDyQbEriMuh55Mxh0SqvkOEGg,10529
|
|
40
|
+
apify/storage_clients/_apify/_models.py,sha256=GEaN7Got1zIg42QPH36obHRWRDVNtzOkRuOWYRf9bFU,4572
|
|
41
|
+
apify/storage_clients/_apify/_request_queue_client.py,sha256=QXCLdTBeNW8RKWnxQOE71KOpZ_lqvqisa89eeiWwZ38,14200
|
|
42
|
+
apify/storage_clients/_apify/_request_queue_shared_client.py,sha256=CbvwcXRvfuBoy3wrQEdLX9_vKELPH_WhHQARP14audM,20709
|
|
43
|
+
apify/storage_clients/_apify/_request_queue_single_client.py,sha256=6CRSyWZPbKQJy3i2JBrojVTnhTYIB3gE0CTymYjpkZA,16958
|
|
44
|
+
apify/storage_clients/_apify/_storage_client.py,sha256=hFl_PuX1UgOydBD6pieZ0u2NWbDmZV-i0qygKdsuHt4,4873
|
|
45
|
+
apify/storage_clients/_apify/_utils.py,sha256=ywXoSM69amRokUZcshbAvQLIcSZq4L-bpYIGyeFxCGQ,7696
|
|
43
46
|
apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
47
|
apify/storage_clients/_file_system/__init__.py,sha256=rDbXatXV9wHKPhKTrXDzWnexhTm7sIJQWucMi-P-SD4,130
|
|
45
|
-
apify/storage_clients/_file_system/_key_value_store_client.py,sha256=
|
|
46
|
-
apify/storage_clients/_file_system/_storage_client.py,sha256=
|
|
48
|
+
apify/storage_clients/_file_system/_key_value_store_client.py,sha256=gxM3ap67PnY80Rd7P3onPAf2pksYpU0LoAlJdayEMdU,4179
|
|
49
|
+
apify/storage_clients/_file_system/_storage_client.py,sha256=rcwpKYlrWzvlSA2xoxftg-EZAi_iGZ3vOCbu0C5lKDE,1396
|
|
50
|
+
apify/storage_clients/_smart_apify/__init__.py,sha256=614B2AaWY-dx6RQ6mod7VVR8gFh75-_jnq5BeDD7hSc,53
|
|
51
|
+
apify/storage_clients/_smart_apify/_storage_client.py,sha256=GCPmVe_xWAFcO2Cuej4su4i97_d33Q9Ih_Sc5xW2Wa4,4674
|
|
47
52
|
apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
|
|
48
53
|
apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
-
apify-3.0.
|
|
50
|
-
apify-3.0.
|
|
51
|
-
apify-3.0.
|
|
52
|
-
apify-3.0.
|
|
54
|
+
apify-3.0.1.dist-info/METADATA,sha256=EmYGXdZ84rJZkj-0_UWKQXbhWnNpu0sETwnPxQ-Vbc0,22580
|
|
55
|
+
apify-3.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
56
|
+
apify-3.0.1.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
57
|
+
apify-3.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|