apify 2.7.2__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/_actor.py +194 -126
- apify/_charging.py +34 -9
- apify/_configuration.py +79 -6
- apify/_crypto.py +0 -6
- apify/_models.py +7 -7
- apify/_proxy_configuration.py +10 -10
- apify/_utils.py +25 -2
- apify/events/__init__.py +5 -0
- apify/events/_apify_event_manager.py +140 -0
- apify/events/_types.py +102 -0
- apify/log.py +0 -9
- apify/request_loaders/__init__.py +18 -0
- apify/{storages/_request_list.py → request_loaders/_apify_request_list.py} +25 -18
- apify/request_loaders/py.typed +0 -0
- apify/scrapy/_logging_config.py +1 -4
- apify/scrapy/extensions/_httpcache.py +9 -5
- apify/scrapy/requests.py +3 -3
- apify/scrapy/scheduler.py +8 -5
- apify/storage_clients/__init__.py +12 -0
- apify/storage_clients/_apify/__init__.py +11 -0
- apify/storage_clients/_apify/_dataset_client.py +328 -0
- apify/storage_clients/_apify/_key_value_store_client.py +265 -0
- apify/storage_clients/_apify/_models.py +131 -0
- apify/storage_clients/_apify/_request_queue_client.py +327 -0
- apify/storage_clients/_apify/_request_queue_shared_client.py +527 -0
- apify/storage_clients/_apify/_request_queue_single_client.py +399 -0
- apify/storage_clients/_apify/_storage_client.py +106 -0
- apify/storage_clients/_apify/_utils.py +194 -0
- apify/storage_clients/_apify/py.typed +0 -0
- apify/storage_clients/_file_system/__init__.py +2 -0
- apify/storage_clients/_file_system/_key_value_store_client.py +57 -0
- apify/storage_clients/_file_system/_storage_client.py +41 -0
- apify/storage_clients/_smart_apify/__init__.py +1 -0
- apify/storage_clients/_smart_apify/_storage_client.py +117 -0
- apify/storage_clients/py.typed +0 -0
- apify/storages/__init__.py +1 -3
- {apify-2.7.2.dist-info → apify-3.0.0.dist-info}/METADATA +25 -9
- apify-3.0.0.dist-info/RECORD +57 -0
- apify/_platform_event_manager.py +0 -231
- apify/apify_storage_client/__init__.py +0 -3
- apify/apify_storage_client/_apify_storage_client.py +0 -72
- apify/apify_storage_client/_dataset_client.py +0 -190
- apify/apify_storage_client/_dataset_collection_client.py +0 -51
- apify/apify_storage_client/_key_value_store_client.py +0 -109
- apify/apify_storage_client/_key_value_store_collection_client.py +0 -51
- apify/apify_storage_client/_request_queue_client.py +0 -176
- apify/apify_storage_client/_request_queue_collection_client.py +0 -51
- apify-2.7.2.dist-info/RECORD +0 -44
- /apify/{apify_storage_client → events}/py.typed +0 -0
- {apify-2.7.2.dist-info → apify-3.0.0.dist-info}/WHEEL +0 -0
- {apify-2.7.2.dist-info → apify-3.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
from asyncio import Lock
|
|
6
|
+
from base64 import b64encode
|
|
7
|
+
from hashlib import sha256
|
|
8
|
+
from logging import getLogger
|
|
9
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
10
|
+
|
|
11
|
+
from apify_client import ApifyClientAsync
|
|
12
|
+
from crawlee._utils.crypto import compute_short_hash
|
|
13
|
+
|
|
14
|
+
from apify._configuration import Configuration
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from types import TracebackType
|
|
18
|
+
|
|
19
|
+
from apify_client.clients import KeyValueStoreClientAsync
|
|
20
|
+
from crawlee.storages import Dataset, KeyValueStore, RequestQueue
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
logger = getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AliasResolver:
|
|
27
|
+
"""Class for handling aliases.
|
|
28
|
+
|
|
29
|
+
The purpose of this is class is to ensure that alias storages are created with correct id. This is achieved by using
|
|
30
|
+
default kvs as a storage for global mapping of aliases to storage ids. Same mapping is also kept in memory to avoid
|
|
31
|
+
unnecessary calls to API and also have limited support of alias storages when not running on Apify platform. When on
|
|
32
|
+
Apify platform, the storages created with alias are accessible by the same alias even after migration or reboot.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
_alias_map: ClassVar[dict[str, str]] = {}
|
|
36
|
+
"""Map containing pre-existing alias storages and their ids. Global for all instances."""
|
|
37
|
+
_alias_init_lock: Lock | None = None
|
|
38
|
+
"""Lock for creating alias storages. Only one alias storage can be created at the time. Global for all instances."""
|
|
39
|
+
|
|
40
|
+
_ALIAS_STORAGE_KEY_SEPARATOR = ','
|
|
41
|
+
_ALIAS_MAPPING_KEY = '__STORAGE_ALIASES_MAPPING'
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self, storage_type: type[Dataset | KeyValueStore | RequestQueue], alias: str, configuration: Configuration
|
|
45
|
+
) -> None:
|
|
46
|
+
self._storage_type = storage_type
|
|
47
|
+
self._alias = alias
|
|
48
|
+
self._additional_cache_key = hash_api_base_url_and_token(configuration)
|
|
49
|
+
|
|
50
|
+
async def __aenter__(self) -> AliasResolver:
|
|
51
|
+
"""Context manager to prevent race condition in alias creation."""
|
|
52
|
+
lock = await self._get_alias_init_lock()
|
|
53
|
+
await lock.acquire()
|
|
54
|
+
return self
|
|
55
|
+
|
|
56
|
+
async def __aexit__(
|
|
57
|
+
self, exc_type: type[BaseException] | None, exc_value: BaseException | None, exc_traceback: TracebackType | None
|
|
58
|
+
) -> None:
|
|
59
|
+
lock = await self._get_alias_init_lock()
|
|
60
|
+
lock.release()
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
async def _get_alias_init_lock(cls) -> Lock:
|
|
64
|
+
"""Get lock for controlling the creation of the alias storages.
|
|
65
|
+
|
|
66
|
+
The lock is shared for all instances of the AliasResolver class.
|
|
67
|
+
It is created in async method to ensure that some event loop is already running.
|
|
68
|
+
"""
|
|
69
|
+
if cls._alias_init_lock is None:
|
|
70
|
+
cls._alias_init_lock = Lock()
|
|
71
|
+
return cls._alias_init_lock
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
async def _get_alias_map(cls) -> dict[str, str]:
|
|
75
|
+
"""Get the aliases and storage ids mapping from the default kvs.
|
|
76
|
+
|
|
77
|
+
Mapping is loaded from kvs only once and is shared for all instances of the AliasResolver class.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Map of aliases and storage ids.
|
|
81
|
+
"""
|
|
82
|
+
if not cls._alias_map and Configuration.get_global_configuration().is_at_home:
|
|
83
|
+
default_kvs_client = await _get_default_kvs_client()
|
|
84
|
+
|
|
85
|
+
record = await default_kvs_client.get_record(cls._ALIAS_MAPPING_KEY)
|
|
86
|
+
|
|
87
|
+
# get_record can return {key: ..., value: ..., content_type: ...}
|
|
88
|
+
if isinstance(record, dict):
|
|
89
|
+
if 'value' in record and isinstance(record['value'], dict):
|
|
90
|
+
cls._alias_map = record['value']
|
|
91
|
+
else:
|
|
92
|
+
cls._alias_map = record
|
|
93
|
+
else:
|
|
94
|
+
cls._alias_map = dict[str, str]()
|
|
95
|
+
|
|
96
|
+
return cls._alias_map
|
|
97
|
+
|
|
98
|
+
async def resolve_id(self) -> str | None:
|
|
99
|
+
"""Get id of the aliased storage.
|
|
100
|
+
|
|
101
|
+
Either locate the id in the in-memory mapping or create the new storage.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Storage id if it exists, None otherwise.
|
|
105
|
+
"""
|
|
106
|
+
return (await self._get_alias_map()).get(self._storage_key, None)
|
|
107
|
+
|
|
108
|
+
async def store_mapping(self, storage_id: str) -> None:
|
|
109
|
+
"""Add alias and related storage id to the mapping in default kvs and local in-memory mapping."""
|
|
110
|
+
# Update in-memory mapping
|
|
111
|
+
(await self._get_alias_map())[self._storage_key] = storage_id
|
|
112
|
+
if not Configuration.get_global_configuration().is_at_home:
|
|
113
|
+
logging.getLogger(__name__).debug(
|
|
114
|
+
'AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.'
|
|
115
|
+
)
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
default_kvs_client = await _get_default_kvs_client()
|
|
119
|
+
await default_kvs_client.get()
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
record = await default_kvs_client.get_record(self._ALIAS_MAPPING_KEY)
|
|
123
|
+
|
|
124
|
+
# get_record can return {key: ..., value: ..., content_type: ...}
|
|
125
|
+
if isinstance(record, dict) and 'value' in record:
|
|
126
|
+
record = record['value']
|
|
127
|
+
|
|
128
|
+
# Update or create the record with the new alias mapping
|
|
129
|
+
if isinstance(record, dict):
|
|
130
|
+
record[self._storage_key] = storage_id
|
|
131
|
+
else:
|
|
132
|
+
record = {self._storage_key: storage_id}
|
|
133
|
+
|
|
134
|
+
# Store the mapping back in the KVS.
|
|
135
|
+
await default_kvs_client.set_record(self._ALIAS_MAPPING_KEY, record)
|
|
136
|
+
except Exception as exc:
|
|
137
|
+
logger.warning(f'Error storing alias mapping for {self._alias}: {exc}')
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def _storage_key(self) -> str:
|
|
141
|
+
"""Get a unique storage key used for storing the alias in the mapping."""
|
|
142
|
+
return self._ALIAS_STORAGE_KEY_SEPARATOR.join(
|
|
143
|
+
[
|
|
144
|
+
self._storage_type.__name__,
|
|
145
|
+
self._alias,
|
|
146
|
+
self._additional_cache_key,
|
|
147
|
+
]
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
async def _get_default_kvs_client() -> KeyValueStoreClientAsync:
|
|
152
|
+
"""Get a client for the default key-value store."""
|
|
153
|
+
configuration = Configuration.get_global_configuration()
|
|
154
|
+
|
|
155
|
+
apify_client_async = ApifyClientAsync(
|
|
156
|
+
token=configuration.token,
|
|
157
|
+
api_url=configuration.api_base_url,
|
|
158
|
+
max_retries=8,
|
|
159
|
+
min_delay_between_retries_millis=500,
|
|
160
|
+
timeout_secs=360,
|
|
161
|
+
)
|
|
162
|
+
if not configuration.default_key_value_store_id:
|
|
163
|
+
raise ValueError("'Configuration.default_key_value_store_id' must be set.")
|
|
164
|
+
return apify_client_async.key_value_store(key_value_store_id=configuration.default_key_value_store_id)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def hash_api_base_url_and_token(configuration: Configuration) -> str:
|
|
168
|
+
"""Hash configuration.api_public_base_url and configuration.token in deterministic way."""
|
|
169
|
+
if configuration.api_public_base_url is None or configuration.token is None:
|
|
170
|
+
raise ValueError("'Configuration.api_public_base_url' and 'Configuration.token' must be set.")
|
|
171
|
+
return compute_short_hash(f'{configuration.api_public_base_url}{configuration.token}'.encode())
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def unique_key_to_request_id(unique_key: str, *, request_id_length: int = 15) -> str:
|
|
175
|
+
"""Generate a deterministic request ID based on a unique key.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
unique_key: The unique key to convert into a request ID.
|
|
179
|
+
request_id_length: The length of the request ID.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
A URL-safe, truncated request ID based on the unique key.
|
|
183
|
+
"""
|
|
184
|
+
# Encode the unique key and compute its SHA-256 hash
|
|
185
|
+
hashed_key = sha256(unique_key.encode('utf-8')).digest()
|
|
186
|
+
|
|
187
|
+
# Encode the hash in base64 and decode it to get a string
|
|
188
|
+
base64_encoded = b64encode(hashed_key).decode('utf-8')
|
|
189
|
+
|
|
190
|
+
# Remove characters that are not URL-safe ('+', '/', or '=')
|
|
191
|
+
url_safe_key = re.sub(r'(\+|\/|=)', '', base64_encoded)
|
|
192
|
+
|
|
193
|
+
# Truncate the key to the desired length
|
|
194
|
+
return url_safe_key[:request_id_length]
|
|
File without changes
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from typing_extensions import override
|
|
6
|
+
|
|
7
|
+
from crawlee._consts import METADATA_FILENAME
|
|
8
|
+
from crawlee.storage_clients._file_system import FileSystemKeyValueStoreClient
|
|
9
|
+
|
|
10
|
+
from apify._configuration import Configuration
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
|
|
14
|
+
"""Apify-specific implementation of the `FileSystemKeyValueStoreClient`.
|
|
15
|
+
|
|
16
|
+
The only difference is that it overrides the `purge` method to delete all files in the key-value store
|
|
17
|
+
directory, except for the metadata file and the `INPUT.json` file.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
@override
|
|
21
|
+
async def purge(self) -> None:
|
|
22
|
+
"""Purges the key-value store by deleting all its contents.
|
|
23
|
+
|
|
24
|
+
It deletes all files in the key-value store directory, except for the metadata file and
|
|
25
|
+
the `INPUT.json` file. It also updates the metadata to reflect that the store has been purged.
|
|
26
|
+
"""
|
|
27
|
+
kvs_input_key = Configuration.get_global_configuration().input_key
|
|
28
|
+
|
|
29
|
+
# First try to find the alternative format of the input file and process it if it exists.
|
|
30
|
+
for file_path in self.path_to_kvs.glob('*'):
|
|
31
|
+
if file_path.name == f'{kvs_input_key}.json':
|
|
32
|
+
await self._process_input_json(file_path)
|
|
33
|
+
|
|
34
|
+
async with self._lock:
|
|
35
|
+
for file_path in self.path_to_kvs.glob('*'):
|
|
36
|
+
if file_path.name in {METADATA_FILENAME, kvs_input_key, f'{kvs_input_key}.{METADATA_FILENAME}'}:
|
|
37
|
+
continue
|
|
38
|
+
if file_path.is_file():
|
|
39
|
+
await asyncio.to_thread(file_path.unlink, missing_ok=True)
|
|
40
|
+
|
|
41
|
+
await self._update_metadata(
|
|
42
|
+
update_accessed_at=True,
|
|
43
|
+
update_modified_at=True,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
async def _process_input_json(self, path: Path) -> None:
|
|
47
|
+
"""Process simple input json file to format expected by the FileSystemKeyValueStoreClient.
|
|
48
|
+
|
|
49
|
+
For example: INPUT.json -> INPUT, INPUT.json.metadata
|
|
50
|
+
"""
|
|
51
|
+
try:
|
|
52
|
+
f = await asyncio.to_thread(path.open)
|
|
53
|
+
input_data = json.load(f)
|
|
54
|
+
finally:
|
|
55
|
+
f.close()
|
|
56
|
+
await asyncio.to_thread(path.unlink, missing_ok=True)
|
|
57
|
+
await self.set_value(key=path.stem, value=input_data)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from typing_extensions import override
|
|
6
|
+
|
|
7
|
+
from crawlee.configuration import Configuration
|
|
8
|
+
from crawlee.storage_clients import FileSystemStorageClient
|
|
9
|
+
|
|
10
|
+
from ._key_value_store_client import ApifyFileSystemKeyValueStoreClient
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from crawlee.storage_clients._file_system import FileSystemKeyValueStoreClient
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ApifyFileSystemStorageClient(FileSystemStorageClient):
|
|
17
|
+
"""Apify-specific implementation of the file system storage client.
|
|
18
|
+
|
|
19
|
+
The only difference is that it uses `ApifyFileSystemKeyValueStoreClient` for key-value stores,
|
|
20
|
+
which overrides the `purge` method to delete all files in the key-value store directory
|
|
21
|
+
except for the metadata file and the `INPUT.json` file.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
@override
|
|
25
|
+
async def create_kvs_client(
|
|
26
|
+
self,
|
|
27
|
+
*,
|
|
28
|
+
id: str | None = None,
|
|
29
|
+
name: str | None = None,
|
|
30
|
+
alias: str | None = None,
|
|
31
|
+
configuration: Configuration | None = None,
|
|
32
|
+
) -> FileSystemKeyValueStoreClient:
|
|
33
|
+
configuration = configuration or Configuration.get_global_configuration()
|
|
34
|
+
client = await ApifyFileSystemKeyValueStoreClient.open(
|
|
35
|
+
id=id,
|
|
36
|
+
name=name,
|
|
37
|
+
alias=alias,
|
|
38
|
+
configuration=configuration,
|
|
39
|
+
)
|
|
40
|
+
await self._purge_if_needed(client, configuration)
|
|
41
|
+
return client
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from ._storage_client import SmartApifyStorageClient
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from typing_extensions import override
|
|
6
|
+
|
|
7
|
+
from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, RequestQueueClient, StorageClient
|
|
8
|
+
|
|
9
|
+
from apify._configuration import Configuration as ApifyConfiguration
|
|
10
|
+
from apify._utils import docs_group
|
|
11
|
+
from apify.storage_clients import ApifyStorageClient
|
|
12
|
+
from apify.storage_clients._file_system import ApifyFileSystemStorageClient
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from collections.abc import Hashable
|
|
16
|
+
|
|
17
|
+
from crawlee.configuration import Configuration as CrawleeConfiguration
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@docs_group('Storage clients')
|
|
21
|
+
class SmartApifyStorageClient(StorageClient):
|
|
22
|
+
"""SmartApifyStorageClient that delegates to cloud_storage_client or local_storage_client.
|
|
23
|
+
|
|
24
|
+
When running on Apify platform use cloud_storage_client, else use local_storage_client. This storage client is
|
|
25
|
+
designed to work specifically in Actor context.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
*,
|
|
31
|
+
cloud_storage_client: ApifyStorageClient | None = None,
|
|
32
|
+
local_storage_client: StorageClient | None = None,
|
|
33
|
+
) -> None:
|
|
34
|
+
"""Initialize the Apify storage client.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
cloud_storage_client: Client used to communicate with the Apify platform storage. Either through
|
|
38
|
+
`force_cloud` argument when opening storages or automatically when running on the Apify platform.
|
|
39
|
+
local_storage_client: Client used to communicate with the storage when not running on the Apify
|
|
40
|
+
platform and not using `force_cloud` argument when opening storages.
|
|
41
|
+
"""
|
|
42
|
+
self._cloud_storage_client = cloud_storage_client or ApifyStorageClient(request_queue_access='single')
|
|
43
|
+
self._local_storage_client = local_storage_client or ApifyFileSystemStorageClient()
|
|
44
|
+
|
|
45
|
+
def __str__(self) -> str:
|
|
46
|
+
return (
|
|
47
|
+
f'{self.__class__.__name__}(cloud_storage_client={self._cloud_storage_client.__class__.__name__},'
|
|
48
|
+
f' local_storage_client={self._local_storage_client.__class__.__name__})'
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
def get_suitable_storage_client(self, *, force_cloud: bool = False) -> StorageClient:
|
|
52
|
+
"""Get a suitable storage client based on the global configuration and the value of the force_cloud flag.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
force_cloud: If True, return `cloud_storage_client`.
|
|
56
|
+
"""
|
|
57
|
+
if ApifyConfiguration.get_global_configuration().is_at_home:
|
|
58
|
+
return self._cloud_storage_client
|
|
59
|
+
|
|
60
|
+
configuration = ApifyConfiguration.get_global_configuration()
|
|
61
|
+
if force_cloud:
|
|
62
|
+
if configuration.token is None:
|
|
63
|
+
raise RuntimeError(
|
|
64
|
+
'In order to use the Apify cloud storage from your computer, '
|
|
65
|
+
'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
|
|
66
|
+
)
|
|
67
|
+
return self._cloud_storage_client
|
|
68
|
+
|
|
69
|
+
return self._local_storage_client
|
|
70
|
+
|
|
71
|
+
@override
|
|
72
|
+
def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
|
|
73
|
+
if ApifyConfiguration.get_global_configuration().is_at_home:
|
|
74
|
+
if isinstance(configuration, ApifyConfiguration):
|
|
75
|
+
return self._cloud_storage_client.get_storage_client_cache_key(configuration)
|
|
76
|
+
raise TypeError('Expecting ApifyConfiguration')
|
|
77
|
+
|
|
78
|
+
return self._local_storage_client.get_storage_client_cache_key(configuration)
|
|
79
|
+
|
|
80
|
+
@override
|
|
81
|
+
async def create_dataset_client(
|
|
82
|
+
self,
|
|
83
|
+
*,
|
|
84
|
+
id: str | None = None,
|
|
85
|
+
name: str | None = None,
|
|
86
|
+
alias: str | None = None,
|
|
87
|
+
configuration: CrawleeConfiguration | None = None,
|
|
88
|
+
) -> DatasetClient:
|
|
89
|
+
return await self.get_suitable_storage_client().create_dataset_client(
|
|
90
|
+
id=id, name=id, alias=alias, configuration=configuration
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
@override
|
|
94
|
+
async def create_kvs_client(
|
|
95
|
+
self,
|
|
96
|
+
*,
|
|
97
|
+
id: str | None = None,
|
|
98
|
+
name: str | None = None,
|
|
99
|
+
alias: str | None = None,
|
|
100
|
+
configuration: CrawleeConfiguration | None = None,
|
|
101
|
+
) -> KeyValueStoreClient:
|
|
102
|
+
return await self.get_suitable_storage_client().create_kvs_client(
|
|
103
|
+
id=id, name=id, alias=alias, configuration=configuration
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
@override
|
|
107
|
+
async def create_rq_client(
|
|
108
|
+
self,
|
|
109
|
+
*,
|
|
110
|
+
id: str | None = None,
|
|
111
|
+
name: str | None = None,
|
|
112
|
+
alias: str | None = None,
|
|
113
|
+
configuration: CrawleeConfiguration | None = None,
|
|
114
|
+
) -> RequestQueueClient:
|
|
115
|
+
return await self.get_suitable_storage_client().create_rq_client(
|
|
116
|
+
id=id, name=id, alias=alias, configuration=configuration
|
|
117
|
+
)
|
|
File without changes
|
apify/storages/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apify
|
|
3
|
-
Version:
|
|
3
|
+
Version: 3.0.0
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
|
|
@@ -219,27 +219,43 @@ Classifier: Environment :: Console
|
|
|
219
219
|
Classifier: Intended Audience :: Developers
|
|
220
220
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
221
221
|
Classifier: Operating System :: OS Independent
|
|
222
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
223
222
|
Classifier: Programming Language :: Python :: 3.10
|
|
224
223
|
Classifier: Programming Language :: Python :: 3.11
|
|
225
224
|
Classifier: Programming Language :: Python :: 3.12
|
|
226
225
|
Classifier: Programming Language :: Python :: 3.13
|
|
227
226
|
Classifier: Topic :: Software Development :: Libraries
|
|
228
|
-
Requires-Python: >=3.
|
|
229
|
-
Requires-Dist: apify-client<2.0.0
|
|
230
|
-
Requires-Dist: apify-shared<2.0.0
|
|
231
|
-
Requires-Dist:
|
|
227
|
+
Requires-Python: >=3.10
|
|
228
|
+
Requires-Dist: apify-client<3.0.0,>=2.0.0
|
|
229
|
+
Requires-Dist: apify-shared<3.0.0,>=2.0.0
|
|
230
|
+
Requires-Dist: cachetools>=5.5.0
|
|
231
|
+
Requires-Dist: crawlee<2.0.0,>=1.0.0
|
|
232
232
|
Requires-Dist: cryptography>=42.0.0
|
|
233
|
-
Requires-Dist:
|
|
234
|
-
Requires-Dist: lazy-object-proxy
|
|
233
|
+
Requires-Dist: impit>=0.6.1
|
|
234
|
+
Requires-Dist: lazy-object-proxy>=1.11.0
|
|
235
235
|
Requires-Dist: more-itertools>=10.2.0
|
|
236
236
|
Requires-Dist: typing-extensions>=4.1.0
|
|
237
237
|
Requires-Dist: websockets>=14.0
|
|
238
|
+
Requires-Dist: yarl>=1.18.0
|
|
238
239
|
Provides-Extra: scrapy
|
|
239
240
|
Requires-Dist: scrapy>=2.11.0; extra == 'scrapy'
|
|
240
241
|
Description-Content-Type: text/markdown
|
|
241
242
|
|
|
242
|
-
|
|
243
|
+
<h1 align=center>Apify SDK for Python</h1>
|
|
244
|
+
|
|
245
|
+
<p align=center>
|
|
246
|
+
<a href="https://badge.fury.io/py/apify" rel="nofollow">
|
|
247
|
+
<img src="https://badge.fury.io/py/apify.svg" alt="PyPI version" style="max-width: 100%;">
|
|
248
|
+
</a>
|
|
249
|
+
<a href="https://pypi.org/project/apify/" rel="nofollow">
|
|
250
|
+
<img src="https://img.shields.io/pypi/dm/apify" alt="PyPI - Downloads" style="max-width: 100%;">
|
|
251
|
+
</a>
|
|
252
|
+
<a href="https://pypi.org/project/apify/" rel="nofollow">
|
|
253
|
+
<img src="https://img.shields.io/pypi/pyversions/apify" alt="PyPI - Python Version" style="max-width: 100%;">
|
|
254
|
+
</a>
|
|
255
|
+
<a href="https://discord.gg/jyEM2PRvMU" rel="nofollow">
|
|
256
|
+
<img src="https://img.shields.io/discord/801163717915574323?label=discord" alt="Chat on discord" style="max-width: 100%;">
|
|
257
|
+
</a>
|
|
258
|
+
</p>
|
|
243
259
|
|
|
244
260
|
The Apify SDK for Python is the official library to create [Apify Actors](https://docs.apify.com/platform/actors)
|
|
245
261
|
in Python. It provides useful features like Actor lifecycle management, local storage emulation, and Actor
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
apify/__init__.py,sha256=HpgKg2FZWJuSPfDygzJ62psylhw4NN4tKFnoYUIhcd4,838
|
|
2
|
+
apify/_actor.py,sha256=DYHoyBAu6hDLs0BcTZL-IQveLK8gPTWvb6AgDnJc3EA,54755
|
|
3
|
+
apify/_charging.py,sha256=KjZ2DnEMS0Tt8ibizmmt0RwBq8FOAsD1z-hKFgdazcY,13143
|
|
4
|
+
apify/_configuration.py,sha256=gq_UfWTgcP1_0kEMLhXVg33SgSxXjShbuzoXyCFfK0w,14682
|
|
5
|
+
apify/_consts.py,sha256=CjhyEJ4Mi0lcIrzfqz8dN7nPJWGjCeBrrXQy1PZ6zRI,440
|
|
6
|
+
apify/_crypto.py,sha256=tqUs13QkemDtGzvU41pIA2HUEawpDlgzqbwKjm4I8kM,6852
|
|
7
|
+
apify/_models.py,sha256=EzU-inWeJ7T5HNVYEwnYb79W-q4OAPhtrYctfRYzpTE,7848
|
|
8
|
+
apify/_proxy_configuration.py,sha256=K9V4vG9-bAx7_a4l0zHhfbzvzopJeHek-qUJ05aQ6wI,13066
|
|
9
|
+
apify/_utils.py,sha256=og_zzRXELQmirklJWp1kSV7pwQPFfAE81UO3IZ4xBNs,2414
|
|
10
|
+
apify/log.py,sha256=Ry251sK4qPRGiFGe3DKcOZazg5OzC6RXS546Zzk8H2M,1003
|
|
11
|
+
apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
apify/events/__init__.py,sha256=fLNqlsM6AboUQrAxmb-GD1Pw6oDivN_eSAviGJtqc7c,198
|
|
13
|
+
apify/events/_apify_event_manager.py,sha256=yArFrKa4wWDZo32iwaA3F_w36VSJf1Yaj_L1opo8ncU,5917
|
|
14
|
+
apify/events/_types.py,sha256=F0BHgACqnRfmdQ9GUcpnZvPxzw2bdRr8BqbGSA4cHeQ,3050
|
|
15
|
+
apify/events/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
apify/request_loaders/__init__.py,sha256=SJqW0FbdZKEtAMB5kBLgqROzh3KmQc2CNEIhJpTGdPQ,356
|
|
17
|
+
apify/request_loaders/_apify_request_list.py,sha256=kurCxX2jAKzHJ5N1Co6KjIgptqgVmjR0WpT8bd6uK9A,6220
|
|
18
|
+
apify/request_loaders/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
apify/scrapy/__init__.py,sha256=m2a0ts_JY9xJkBy4JU5mV8PJqjA3GGKLXBFu4nl-n-A,1048
|
|
20
|
+
apify/scrapy/_actor_runner.py,sha256=rXWSnlQWGskDUH8PtLCv5SkOIx4AiVa4QbCYeCett5c,938
|
|
21
|
+
apify/scrapy/_async_thread.py,sha256=8xif_fWce7vaMLuDc-XuDzZlHbCI-NY61YXdP2P27QY,4753
|
|
22
|
+
apify/scrapy/_logging_config.py,sha256=KZttIiMXcfu_isYcvXQnWdoYFdYI2wK5AAdbhd57cp4,1819
|
|
23
|
+
apify/scrapy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
+
apify/scrapy/requests.py,sha256=JNVsITB-4xtT8LnkQc8z_G4OcEE9CmkMUvLGqbva5ZI,6460
|
|
25
|
+
apify/scrapy/scheduler.py,sha256=hvTJ9AxLoifqZi7C8MtLcCP0ujGk4D4dgq4qbCD_zco,6098
|
|
26
|
+
apify/scrapy/utils.py,sha256=Ssfa-P9-g9XYP1suDce6dQ8ta7PfijiPoMl2iplE6Ow,2126
|
|
27
|
+
apify/scrapy/extensions/__init__.py,sha256=cVQ8CCtOsJsRP28YKZWSUsi4FBwxI-yPJRNSXPFSa_o,98
|
|
28
|
+
apify/scrapy/extensions/_httpcache.py,sha256=XIS9vFCcUtdSfeKAKnxRnI9dX_GMmX2Od8OPnOaDhQ0,8870
|
|
29
|
+
apify/scrapy/middlewares/__init__.py,sha256=tfW-d3WFWLeNEjL8fTmon6NwgD-OXx1Bw2fBdU-wPy4,114
|
|
30
|
+
apify/scrapy/middlewares/apify_proxy.py,sha256=CDAOXS3bcVDZHM3B0GvhXbxEikMIadLF_0P73WL_nI4,5550
|
|
31
|
+
apify/scrapy/middlewares/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
|
+
apify/scrapy/pipelines/__init__.py,sha256=GWPeLN_Zwj8vRBWtXW6DaxdB7mvyQ7Jw5Tz1ccgWlZI,119
|
|
33
|
+
apify/scrapy/pipelines/actor_dataset_push.py,sha256=XUUyznQTD-E3wYUUFt2WAOnWhbnRrY0WuedlfYfYhDI,846
|
|
34
|
+
apify/scrapy/pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
+
apify/storage_clients/__init__.py,sha256=JheTvNpVD_luQXC1KTEgtr6yVnuMEC9ajBNLCX3HuSo,358
|
|
36
|
+
apify/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
+
apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBsWa15Y5e7RMU,347
|
|
38
|
+
apify/storage_clients/_apify/_dataset_client.py,sha256=Bb3UwOaFkyuEY7tuBf8K46R4ZP_b1EaAkDOXOqwSoW8,12498
|
|
39
|
+
apify/storage_clients/_apify/_key_value_store_client.py,sha256=42dARbLX2oeOW7uYYKkDyQbEriMuh55Mxh0SqvkOEGg,10529
|
|
40
|
+
apify/storage_clients/_apify/_models.py,sha256=GEaN7Got1zIg42QPH36obHRWRDVNtzOkRuOWYRf9bFU,4572
|
|
41
|
+
apify/storage_clients/_apify/_request_queue_client.py,sha256=QXCLdTBeNW8RKWnxQOE71KOpZ_lqvqisa89eeiWwZ38,14200
|
|
42
|
+
apify/storage_clients/_apify/_request_queue_shared_client.py,sha256=CbvwcXRvfuBoy3wrQEdLX9_vKELPH_WhHQARP14audM,20709
|
|
43
|
+
apify/storage_clients/_apify/_request_queue_single_client.py,sha256=6CRSyWZPbKQJy3i2JBrojVTnhTYIB3gE0CTymYjpkZA,16958
|
|
44
|
+
apify/storage_clients/_apify/_storage_client.py,sha256=hFl_PuX1UgOydBD6pieZ0u2NWbDmZV-i0qygKdsuHt4,4873
|
|
45
|
+
apify/storage_clients/_apify/_utils.py,sha256=ywXoSM69amRokUZcshbAvQLIcSZq4L-bpYIGyeFxCGQ,7696
|
|
46
|
+
apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
+
apify/storage_clients/_file_system/__init__.py,sha256=rDbXatXV9wHKPhKTrXDzWnexhTm7sIJQWucMi-P-SD4,130
|
|
48
|
+
apify/storage_clients/_file_system/_key_value_store_client.py,sha256=fnSJ1EIOPCGfcE6e5S3Tux9VbnMVLCJjugkaQoH_9yo,2267
|
|
49
|
+
apify/storage_clients/_file_system/_storage_client.py,sha256=rcwpKYlrWzvlSA2xoxftg-EZAi_iGZ3vOCbu0C5lKDE,1396
|
|
50
|
+
apify/storage_clients/_smart_apify/__init__.py,sha256=614B2AaWY-dx6RQ6mod7VVR8gFh75-_jnq5BeDD7hSc,53
|
|
51
|
+
apify/storage_clients/_smart_apify/_storage_client.py,sha256=GCPmVe_xWAFcO2Cuej4su4i97_d33Q9Ih_Sc5xW2Wa4,4674
|
|
52
|
+
apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
|
|
53
|
+
apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
+
apify-3.0.0.dist-info/METADATA,sha256=DnqOx7gmtjiPYoIcUHCsCDbVr4sOH-3z2nZXiHxMFMo,22580
|
|
55
|
+
apify-3.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
56
|
+
apify-3.0.0.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
57
|
+
apify-3.0.0.dist-info/RECORD,,
|