apify 1.7.1b1__py3-none-any.whl → 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/__init__.py +33 -4
- apify/_actor.py +1074 -0
- apify/_configuration.py +370 -0
- apify/_consts.py +10 -0
- apify/_crypto.py +31 -27
- apify/_models.py +117 -0
- apify/_platform_event_manager.py +231 -0
- apify/_proxy_configuration.py +320 -0
- apify/_utils.py +18 -484
- apify/apify_storage_client/__init__.py +3 -0
- apify/apify_storage_client/_apify_storage_client.py +68 -0
- apify/apify_storage_client/_dataset_client.py +190 -0
- apify/apify_storage_client/_dataset_collection_client.py +51 -0
- apify/apify_storage_client/_key_value_store_client.py +94 -0
- apify/apify_storage_client/_key_value_store_collection_client.py +51 -0
- apify/apify_storage_client/_request_queue_client.py +176 -0
- apify/apify_storage_client/_request_queue_collection_client.py +51 -0
- apify/apify_storage_client/py.typed +0 -0
- apify/log.py +22 -105
- apify/scrapy/__init__.py +11 -3
- apify/scrapy/middlewares/__init__.py +3 -1
- apify/scrapy/middlewares/apify_proxy.py +29 -27
- apify/scrapy/middlewares/py.typed +0 -0
- apify/scrapy/pipelines/__init__.py +3 -1
- apify/scrapy/pipelines/actor_dataset_push.py +6 -3
- apify/scrapy/pipelines/py.typed +0 -0
- apify/scrapy/py.typed +0 -0
- apify/scrapy/requests.py +60 -58
- apify/scrapy/scheduler.py +28 -19
- apify/scrapy/utils.py +10 -32
- apify/storages/__init__.py +4 -10
- apify/storages/_request_list.py +150 -0
- apify/storages/py.typed +0 -0
- apify-2.2.1.dist-info/METADATA +211 -0
- apify-2.2.1.dist-info/RECORD +38 -0
- {apify-1.7.1b1.dist-info → apify-2.2.1.dist-info}/WHEEL +1 -2
- apify/_memory_storage/__init__.py +0 -3
- apify/_memory_storage/file_storage_utils.py +0 -71
- apify/_memory_storage/memory_storage_client.py +0 -219
- apify/_memory_storage/resource_clients/__init__.py +0 -19
- apify/_memory_storage/resource_clients/base_resource_client.py +0 -141
- apify/_memory_storage/resource_clients/base_resource_collection_client.py +0 -114
- apify/_memory_storage/resource_clients/dataset.py +0 -452
- apify/_memory_storage/resource_clients/dataset_collection.py +0 -48
- apify/_memory_storage/resource_clients/key_value_store.py +0 -533
- apify/_memory_storage/resource_clients/key_value_store_collection.py +0 -48
- apify/_memory_storage/resource_clients/request_queue.py +0 -466
- apify/_memory_storage/resource_clients/request_queue_collection.py +0 -48
- apify/actor.py +0 -1351
- apify/config.py +0 -127
- apify/consts.py +0 -67
- apify/event_manager.py +0 -236
- apify/proxy_configuration.py +0 -365
- apify/storages/base_storage.py +0 -181
- apify/storages/dataset.py +0 -494
- apify/storages/key_value_store.py +0 -257
- apify/storages/request_queue.py +0 -602
- apify/storages/storage_client_manager.py +0 -72
- apify-1.7.1b1.dist-info/METADATA +0 -149
- apify-1.7.1b1.dist-info/RECORD +0 -41
- apify-1.7.1b1.dist-info/top_level.txt +0 -1
- {apify-1.7.1b1.dist-info → apify-2.2.1.dist-info}/LICENSE +0 -0
|
@@ -1,257 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import TYPE_CHECKING, Any, AsyncIterator, NamedTuple, TypedDict, TypeVar, overload
|
|
4
|
-
|
|
5
|
-
from apify_client.clients import KeyValueStoreClientAsync, KeyValueStoreCollectionClientAsync
|
|
6
|
-
from apify_shared.utils import ignore_docs
|
|
7
|
-
|
|
8
|
-
from apify._utils import wrap_internal
|
|
9
|
-
from apify.storages.base_storage import BaseStorage
|
|
10
|
-
|
|
11
|
-
if TYPE_CHECKING:
|
|
12
|
-
from apify_client import ApifyClientAsync
|
|
13
|
-
|
|
14
|
-
from apify._memory_storage import MemoryStorageClient
|
|
15
|
-
from apify._memory_storage.resource_clients import KeyValueStoreClient, KeyValueStoreCollectionClient
|
|
16
|
-
from apify.config import Configuration
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
T = TypeVar('T')
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class IterateKeysInfo(TypedDict):
|
|
23
|
-
"""Contains information about a key-value store record."""
|
|
24
|
-
|
|
25
|
-
size: int
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class IterateKeysTuple(NamedTuple):
|
|
29
|
-
"""A tuple representing a key-value store record."""
|
|
30
|
-
|
|
31
|
-
key: str
|
|
32
|
-
info: IterateKeysInfo
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class KeyValueStore(BaseStorage):
|
|
36
|
-
"""The `KeyValueStore` class represents a key-value store.
|
|
37
|
-
|
|
38
|
-
You can imagine it as a simple data storage that is used
|
|
39
|
-
for saving and reading data records or files. Each data record is
|
|
40
|
-
represented by a unique key and associated with a MIME content type.
|
|
41
|
-
|
|
42
|
-
Do not instantiate this class directly, use the `Actor.open_key_value_store()` function instead.
|
|
43
|
-
|
|
44
|
-
Each crawler run is associated with a default key-value store, which is created exclusively
|
|
45
|
-
for the run. By convention, the crawler input and output are stored into the
|
|
46
|
-
default key-value store under the `INPUT` and `OUTPUT` key, respectively.
|
|
47
|
-
Typically, input and output are JSON files, although it can be any other format.
|
|
48
|
-
To access the default key-value store directly, you can use the
|
|
49
|
-
`KeyValueStore.get_value` and `KeyValueStore.set_value` convenience functions.
|
|
50
|
-
|
|
51
|
-
`KeyValueStore` stores its data either on local disk or in the Apify cloud,
|
|
52
|
-
depending on whether the `APIFY_LOCAL_STORAGE_DIR` or `APIFY_TOKEN` environment variables are set.
|
|
53
|
-
|
|
54
|
-
If the `APIFY_LOCAL_STORAGE_DIR` environment variable is set, the data is stored in
|
|
55
|
-
the local directory in the following files:
|
|
56
|
-
```
|
|
57
|
-
{APIFY_LOCAL_STORAGE_DIR}/key_value_stores/{STORE_ID}/{INDEX}.{EXT}
|
|
58
|
-
```
|
|
59
|
-
Note that `{STORE_ID}` is the name or ID of the key-value store. The default key-value store has ID: `default`,
|
|
60
|
-
unless you override it by setting the `APIFY_DEFAULT_KEY_VALUE_STORE_ID` environment variable.
|
|
61
|
-
The `{KEY}` is the key of the record and `{EXT}` corresponds to the MIME content type of the data value.
|
|
62
|
-
|
|
63
|
-
If the `APIFY_TOKEN` environment variable is set but `APIFY_LOCAL_STORAGE_DIR` is not, the data is stored in the
|
|
64
|
-
[Apify Key-value store](https://docs.apify.com/storage/key-value-store) cloud storage.
|
|
65
|
-
"""
|
|
66
|
-
|
|
67
|
-
_id: str
|
|
68
|
-
_name: str | None
|
|
69
|
-
_key_value_store_client: KeyValueStoreClientAsync | KeyValueStoreClient
|
|
70
|
-
|
|
71
|
-
@ignore_docs
|
|
72
|
-
def __init__(
|
|
73
|
-
self: KeyValueStore,
|
|
74
|
-
id: str, # noqa: A002
|
|
75
|
-
name: str | None,
|
|
76
|
-
client: ApifyClientAsync | MemoryStorageClient,
|
|
77
|
-
config: Configuration,
|
|
78
|
-
) -> None:
|
|
79
|
-
"""Create a `KeyValueStore` instance.
|
|
80
|
-
|
|
81
|
-
Do not use the constructor directly, use the `Actor.open_key_value_store()` function instead.
|
|
82
|
-
|
|
83
|
-
Args:
|
|
84
|
-
id (str): ID of the key-value store.
|
|
85
|
-
name (str, optional): Name of the key-value store.
|
|
86
|
-
client (ApifyClientAsync or MemoryStorageClient): The storage client which should be used.
|
|
87
|
-
config (Configuration): The configuration which should be used.
|
|
88
|
-
"""
|
|
89
|
-
super().__init__(id=id, name=name, client=client, config=config)
|
|
90
|
-
|
|
91
|
-
self.get_value = wrap_internal(self._get_value_internal, self.get_value) # type: ignore
|
|
92
|
-
self.set_value = wrap_internal(self._set_value_internal, self.set_value) # type: ignore
|
|
93
|
-
self.get_public_url = wrap_internal(self._get_public_url_internal, self.get_public_url) # type: ignore
|
|
94
|
-
self._id = id
|
|
95
|
-
self._name = name
|
|
96
|
-
self._key_value_store_client = client.key_value_store(self._id)
|
|
97
|
-
|
|
98
|
-
@classmethod
|
|
99
|
-
def _get_human_friendly_label(cls: type[KeyValueStore]) -> str:
|
|
100
|
-
return 'Key-value store'
|
|
101
|
-
|
|
102
|
-
@classmethod
|
|
103
|
-
def _get_default_id(cls: type[KeyValueStore], config: Configuration) -> str:
|
|
104
|
-
return config.default_key_value_store_id
|
|
105
|
-
|
|
106
|
-
@classmethod
|
|
107
|
-
def _get_single_storage_client(
|
|
108
|
-
cls: type[KeyValueStore],
|
|
109
|
-
id: str, # noqa: A002
|
|
110
|
-
client: ApifyClientAsync | MemoryStorageClient,
|
|
111
|
-
) -> KeyValueStoreClientAsync | KeyValueStoreClient:
|
|
112
|
-
return client.key_value_store(id)
|
|
113
|
-
|
|
114
|
-
@classmethod
|
|
115
|
-
def _get_storage_collection_client(
|
|
116
|
-
cls: type[KeyValueStore],
|
|
117
|
-
client: ApifyClientAsync | MemoryStorageClient,
|
|
118
|
-
) -> KeyValueStoreCollectionClientAsync | KeyValueStoreCollectionClient:
|
|
119
|
-
return client.key_value_stores()
|
|
120
|
-
|
|
121
|
-
@overload
|
|
122
|
-
@classmethod
|
|
123
|
-
async def get_value(cls: type[KeyValueStore], key: str) -> Any:
|
|
124
|
-
...
|
|
125
|
-
|
|
126
|
-
@overload
|
|
127
|
-
@classmethod
|
|
128
|
-
async def get_value(cls: type[KeyValueStore], key: str, default_value: T) -> T:
|
|
129
|
-
...
|
|
130
|
-
|
|
131
|
-
@overload
|
|
132
|
-
@classmethod
|
|
133
|
-
async def get_value(cls: type[KeyValueStore], key: str, default_value: T | None = None) -> T | None:
|
|
134
|
-
...
|
|
135
|
-
|
|
136
|
-
@classmethod
|
|
137
|
-
async def get_value(cls: type[KeyValueStore], key: str, default_value: T | None = None) -> T | None:
|
|
138
|
-
"""Get a value from the key-value store.
|
|
139
|
-
|
|
140
|
-
Args:
|
|
141
|
-
key (str): Key of the record to retrieve.
|
|
142
|
-
default_value (Any, optional): Default value returned in case the record does not exist.
|
|
143
|
-
|
|
144
|
-
Returns:
|
|
145
|
-
Any: The value associated with the given key. `default_value` is used in case the record does not exist.
|
|
146
|
-
"""
|
|
147
|
-
store = await cls.open()
|
|
148
|
-
return await store.get_value(key, default_value)
|
|
149
|
-
|
|
150
|
-
async def _get_value_internal(self: KeyValueStore, key: str, default_value: T | None = None) -> T | None:
|
|
151
|
-
record = await self._key_value_store_client.get_record(key)
|
|
152
|
-
return record['value'] if record else default_value
|
|
153
|
-
|
|
154
|
-
async def iterate_keys(
|
|
155
|
-
self: KeyValueStore,
|
|
156
|
-
exclusive_start_key: str | None = None,
|
|
157
|
-
) -> AsyncIterator[IterateKeysTuple]:
|
|
158
|
-
"""Iterate over the keys in the key-value store.
|
|
159
|
-
|
|
160
|
-
Args:
|
|
161
|
-
exclusive_start_key (str, optional): All keys up to this one (including) are skipped from the result.
|
|
162
|
-
|
|
163
|
-
Yields:
|
|
164
|
-
IterateKeysTuple: A tuple `(key, info)`,
|
|
165
|
-
where `key` is the record key, and `info` is an object that contains a single property `size`
|
|
166
|
-
indicating size of the record in bytes.
|
|
167
|
-
"""
|
|
168
|
-
while True:
|
|
169
|
-
list_keys = await self._key_value_store_client.list_keys(exclusive_start_key=exclusive_start_key)
|
|
170
|
-
for item in list_keys['items']:
|
|
171
|
-
yield IterateKeysTuple(item['key'], {'size': item['size']})
|
|
172
|
-
|
|
173
|
-
if not list_keys['isTruncated']:
|
|
174
|
-
break
|
|
175
|
-
exclusive_start_key = list_keys['nextExclusiveStartKey']
|
|
176
|
-
|
|
177
|
-
@classmethod
|
|
178
|
-
async def set_value(
|
|
179
|
-
cls: type[KeyValueStore],
|
|
180
|
-
key: str,
|
|
181
|
-
value: Any,
|
|
182
|
-
content_type: str | None = None,
|
|
183
|
-
) -> None:
|
|
184
|
-
"""Set or delete a value in the key-value store.
|
|
185
|
-
|
|
186
|
-
Args:
|
|
187
|
-
key (str): The key under which the value should be saved.
|
|
188
|
-
value (Any): The value to save. If the value is `None`, the corresponding key-value pair will be deleted.
|
|
189
|
-
content_type (str, optional): The content type of the saved value.
|
|
190
|
-
"""
|
|
191
|
-
store = await cls.open()
|
|
192
|
-
return await store.set_value(key, value, content_type)
|
|
193
|
-
|
|
194
|
-
async def _set_value_internal(
|
|
195
|
-
self: KeyValueStore,
|
|
196
|
-
key: str,
|
|
197
|
-
value: Any,
|
|
198
|
-
content_type: str | None = None,
|
|
199
|
-
) -> None:
|
|
200
|
-
if value is None:
|
|
201
|
-
return await self._key_value_store_client.delete_record(key)
|
|
202
|
-
|
|
203
|
-
return await self._key_value_store_client.set_record(key, value, content_type)
|
|
204
|
-
|
|
205
|
-
@classmethod
|
|
206
|
-
async def get_public_url(cls: type[KeyValueStore], key: str) -> str:
|
|
207
|
-
"""Get a URL for the given key that may be used to publicly access the value in the remote key-value store.
|
|
208
|
-
|
|
209
|
-
Args:
|
|
210
|
-
key (str): The key for which the URL should be generated.
|
|
211
|
-
"""
|
|
212
|
-
store = await cls.open()
|
|
213
|
-
return await store.get_public_url(key)
|
|
214
|
-
|
|
215
|
-
async def _get_public_url_internal(self: KeyValueStore, key: str) -> str:
|
|
216
|
-
if not isinstance(self._key_value_store_client, KeyValueStoreClientAsync):
|
|
217
|
-
raise RuntimeError('Cannot generate a public URL for this key-value store as it is not on the Apify Platform!') # noqa: TRY004
|
|
218
|
-
|
|
219
|
-
public_api_url = self._config.api_public_base_url
|
|
220
|
-
|
|
221
|
-
return f'{public_api_url}/v2/key-value-stores/{self._id}/records/{key}'
|
|
222
|
-
|
|
223
|
-
async def drop(self: KeyValueStore) -> None:
|
|
224
|
-
"""Remove the key-value store either from the Apify cloud storage or from the local directory."""
|
|
225
|
-
await self._key_value_store_client.delete()
|
|
226
|
-
self._remove_from_cache()
|
|
227
|
-
|
|
228
|
-
@classmethod
|
|
229
|
-
async def open(
|
|
230
|
-
cls: type[KeyValueStore],
|
|
231
|
-
*,
|
|
232
|
-
id: str | None = None, # noqa: A002
|
|
233
|
-
name: str | None = None,
|
|
234
|
-
force_cloud: bool = False,
|
|
235
|
-
config: Configuration | None = None,
|
|
236
|
-
) -> KeyValueStore:
|
|
237
|
-
"""Open a key-value store.
|
|
238
|
-
|
|
239
|
-
Key-value stores are used to store records or files, along with their MIME content type.
|
|
240
|
-
The records are stored and retrieved using a unique key.
|
|
241
|
-
The actual data is stored either on a local filesystem or in the Apify cloud.
|
|
242
|
-
|
|
243
|
-
Args:
|
|
244
|
-
id (str, optional): ID of the key-value store to be opened.
|
|
245
|
-
If neither `id` nor `name` are provided, the method returns the default key-value store associated with the actor run.
|
|
246
|
-
If the key-value store with the given ID does not exist, it raises an error.
|
|
247
|
-
name (str, optional): Name of the key-value store to be opened.
|
|
248
|
-
If neither `id` nor `name` are provided, the method returns the default key-value store associated with the actor run.
|
|
249
|
-
If the key-value store with the given name does not exist, it is created.
|
|
250
|
-
force_cloud (bool, optional): If set to True, it will open a key-value store on the Apify Platform even when running the actor locally.
|
|
251
|
-
Defaults to False.
|
|
252
|
-
config (Configuration, optional): A `Configuration` instance, uses global configuration if omitted.
|
|
253
|
-
|
|
254
|
-
Returns:
|
|
255
|
-
KeyValueStore: An instance of the `KeyValueStore` class for the given ID or name.
|
|
256
|
-
"""
|
|
257
|
-
return await super().open(id=id, name=name, force_cloud=force_cloud, config=config) # type: ignore
|