apify 1.7.1b1__py3-none-any.whl → 2.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

Files changed (62) hide show
  1. apify/__init__.py +33 -4
  2. apify/_actor.py +1074 -0
  3. apify/_configuration.py +370 -0
  4. apify/_consts.py +10 -0
  5. apify/_crypto.py +31 -27
  6. apify/_models.py +117 -0
  7. apify/_platform_event_manager.py +231 -0
  8. apify/_proxy_configuration.py +320 -0
  9. apify/_utils.py +18 -484
  10. apify/apify_storage_client/__init__.py +3 -0
  11. apify/apify_storage_client/_apify_storage_client.py +68 -0
  12. apify/apify_storage_client/_dataset_client.py +190 -0
  13. apify/apify_storage_client/_dataset_collection_client.py +51 -0
  14. apify/apify_storage_client/_key_value_store_client.py +94 -0
  15. apify/apify_storage_client/_key_value_store_collection_client.py +51 -0
  16. apify/apify_storage_client/_request_queue_client.py +176 -0
  17. apify/apify_storage_client/_request_queue_collection_client.py +51 -0
  18. apify/apify_storage_client/py.typed +0 -0
  19. apify/log.py +22 -105
  20. apify/scrapy/__init__.py +11 -3
  21. apify/scrapy/middlewares/__init__.py +3 -1
  22. apify/scrapy/middlewares/apify_proxy.py +29 -27
  23. apify/scrapy/middlewares/py.typed +0 -0
  24. apify/scrapy/pipelines/__init__.py +3 -1
  25. apify/scrapy/pipelines/actor_dataset_push.py +6 -3
  26. apify/scrapy/pipelines/py.typed +0 -0
  27. apify/scrapy/py.typed +0 -0
  28. apify/scrapy/requests.py +60 -58
  29. apify/scrapy/scheduler.py +28 -19
  30. apify/scrapy/utils.py +10 -32
  31. apify/storages/__init__.py +4 -10
  32. apify/storages/_request_list.py +150 -0
  33. apify/storages/py.typed +0 -0
  34. apify-2.2.1.dist-info/METADATA +211 -0
  35. apify-2.2.1.dist-info/RECORD +38 -0
  36. {apify-1.7.1b1.dist-info → apify-2.2.1.dist-info}/WHEEL +1 -2
  37. apify/_memory_storage/__init__.py +0 -3
  38. apify/_memory_storage/file_storage_utils.py +0 -71
  39. apify/_memory_storage/memory_storage_client.py +0 -219
  40. apify/_memory_storage/resource_clients/__init__.py +0 -19
  41. apify/_memory_storage/resource_clients/base_resource_client.py +0 -141
  42. apify/_memory_storage/resource_clients/base_resource_collection_client.py +0 -114
  43. apify/_memory_storage/resource_clients/dataset.py +0 -452
  44. apify/_memory_storage/resource_clients/dataset_collection.py +0 -48
  45. apify/_memory_storage/resource_clients/key_value_store.py +0 -533
  46. apify/_memory_storage/resource_clients/key_value_store_collection.py +0 -48
  47. apify/_memory_storage/resource_clients/request_queue.py +0 -466
  48. apify/_memory_storage/resource_clients/request_queue_collection.py +0 -48
  49. apify/actor.py +0 -1351
  50. apify/config.py +0 -127
  51. apify/consts.py +0 -67
  52. apify/event_manager.py +0 -236
  53. apify/proxy_configuration.py +0 -365
  54. apify/storages/base_storage.py +0 -181
  55. apify/storages/dataset.py +0 -494
  56. apify/storages/key_value_store.py +0 -257
  57. apify/storages/request_queue.py +0 -602
  58. apify/storages/storage_client_manager.py +0 -72
  59. apify-1.7.1b1.dist-info/METADATA +0 -149
  60. apify-1.7.1b1.dist-info/RECORD +0 -41
  61. apify-1.7.1b1.dist-info/top_level.txt +0 -1
  62. {apify-1.7.1b1.dist-info → apify-2.2.1.dist-info}/LICENSE +0 -0
@@ -1,257 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING, Any, AsyncIterator, NamedTuple, TypedDict, TypeVar, overload
4
-
5
- from apify_client.clients import KeyValueStoreClientAsync, KeyValueStoreCollectionClientAsync
6
- from apify_shared.utils import ignore_docs
7
-
8
- from apify._utils import wrap_internal
9
- from apify.storages.base_storage import BaseStorage
10
-
11
- if TYPE_CHECKING:
12
- from apify_client import ApifyClientAsync
13
-
14
- from apify._memory_storage import MemoryStorageClient
15
- from apify._memory_storage.resource_clients import KeyValueStoreClient, KeyValueStoreCollectionClient
16
- from apify.config import Configuration
17
-
18
-
19
- T = TypeVar('T')
20
-
21
-
22
- class IterateKeysInfo(TypedDict):
23
- """Contains information about a key-value store record."""
24
-
25
- size: int
26
-
27
-
28
- class IterateKeysTuple(NamedTuple):
29
- """A tuple representing a key-value store record."""
30
-
31
- key: str
32
- info: IterateKeysInfo
33
-
34
-
35
- class KeyValueStore(BaseStorage):
36
- """The `KeyValueStore` class represents a key-value store.
37
-
38
- You can imagine it as a simple data storage that is used
39
- for saving and reading data records or files. Each data record is
40
- represented by a unique key and associated with a MIME content type.
41
-
42
- Do not instantiate this class directly, use the `Actor.open_key_value_store()` function instead.
43
-
44
- Each crawler run is associated with a default key-value store, which is created exclusively
45
- for the run. By convention, the crawler input and output are stored into the
46
- default key-value store under the `INPUT` and `OUTPUT` key, respectively.
47
- Typically, input and output are JSON files, although it can be any other format.
48
- To access the default key-value store directly, you can use the
49
- `KeyValueStore.get_value` and `KeyValueStore.set_value` convenience functions.
50
-
51
- `KeyValueStore` stores its data either on local disk or in the Apify cloud,
52
- depending on whether the `APIFY_LOCAL_STORAGE_DIR` or `APIFY_TOKEN` environment variables are set.
53
-
54
- If the `APIFY_LOCAL_STORAGE_DIR` environment variable is set, the data is stored in
55
- the local directory in the following files:
56
- ```
57
- {APIFY_LOCAL_STORAGE_DIR}/key_value_stores/{STORE_ID}/{INDEX}.{EXT}
58
- ```
59
- Note that `{STORE_ID}` is the name or ID of the key-value store. The default key-value store has ID: `default`,
60
- unless you override it by setting the `APIFY_DEFAULT_KEY_VALUE_STORE_ID` environment variable.
61
- The `{KEY}` is the key of the record and `{EXT}` corresponds to the MIME content type of the data value.
62
-
63
- If the `APIFY_TOKEN` environment variable is set but `APIFY_LOCAL_STORAGE_DIR` is not, the data is stored in the
64
- [Apify Key-value store](https://docs.apify.com/storage/key-value-store) cloud storage.
65
- """
66
-
67
- _id: str
68
- _name: str | None
69
- _key_value_store_client: KeyValueStoreClientAsync | KeyValueStoreClient
70
-
71
- @ignore_docs
72
- def __init__(
73
- self: KeyValueStore,
74
- id: str, # noqa: A002
75
- name: str | None,
76
- client: ApifyClientAsync | MemoryStorageClient,
77
- config: Configuration,
78
- ) -> None:
79
- """Create a `KeyValueStore` instance.
80
-
81
- Do not use the constructor directly, use the `Actor.open_key_value_store()` function instead.
82
-
83
- Args:
84
- id (str): ID of the key-value store.
85
- name (str, optional): Name of the key-value store.
86
- client (ApifyClientAsync or MemoryStorageClient): The storage client which should be used.
87
- config (Configuration): The configuration which should be used.
88
- """
89
- super().__init__(id=id, name=name, client=client, config=config)
90
-
91
- self.get_value = wrap_internal(self._get_value_internal, self.get_value) # type: ignore
92
- self.set_value = wrap_internal(self._set_value_internal, self.set_value) # type: ignore
93
- self.get_public_url = wrap_internal(self._get_public_url_internal, self.get_public_url) # type: ignore
94
- self._id = id
95
- self._name = name
96
- self._key_value_store_client = client.key_value_store(self._id)
97
-
98
- @classmethod
99
- def _get_human_friendly_label(cls: type[KeyValueStore]) -> str:
100
- return 'Key-value store'
101
-
102
- @classmethod
103
- def _get_default_id(cls: type[KeyValueStore], config: Configuration) -> str:
104
- return config.default_key_value_store_id
105
-
106
- @classmethod
107
- def _get_single_storage_client(
108
- cls: type[KeyValueStore],
109
- id: str, # noqa: A002
110
- client: ApifyClientAsync | MemoryStorageClient,
111
- ) -> KeyValueStoreClientAsync | KeyValueStoreClient:
112
- return client.key_value_store(id)
113
-
114
- @classmethod
115
- def _get_storage_collection_client(
116
- cls: type[KeyValueStore],
117
- client: ApifyClientAsync | MemoryStorageClient,
118
- ) -> KeyValueStoreCollectionClientAsync | KeyValueStoreCollectionClient:
119
- return client.key_value_stores()
120
-
121
- @overload
122
- @classmethod
123
- async def get_value(cls: type[KeyValueStore], key: str) -> Any:
124
- ...
125
-
126
- @overload
127
- @classmethod
128
- async def get_value(cls: type[KeyValueStore], key: str, default_value: T) -> T:
129
- ...
130
-
131
- @overload
132
- @classmethod
133
- async def get_value(cls: type[KeyValueStore], key: str, default_value: T | None = None) -> T | None:
134
- ...
135
-
136
- @classmethod
137
- async def get_value(cls: type[KeyValueStore], key: str, default_value: T | None = None) -> T | None:
138
- """Get a value from the key-value store.
139
-
140
- Args:
141
- key (str): Key of the record to retrieve.
142
- default_value (Any, optional): Default value returned in case the record does not exist.
143
-
144
- Returns:
145
- Any: The value associated with the given key. `default_value` is used in case the record does not exist.
146
- """
147
- store = await cls.open()
148
- return await store.get_value(key, default_value)
149
-
150
- async def _get_value_internal(self: KeyValueStore, key: str, default_value: T | None = None) -> T | None:
151
- record = await self._key_value_store_client.get_record(key)
152
- return record['value'] if record else default_value
153
-
154
- async def iterate_keys(
155
- self: KeyValueStore,
156
- exclusive_start_key: str | None = None,
157
- ) -> AsyncIterator[IterateKeysTuple]:
158
- """Iterate over the keys in the key-value store.
159
-
160
- Args:
161
- exclusive_start_key (str, optional): All keys up to this one (including) are skipped from the result.
162
-
163
- Yields:
164
- IterateKeysTuple: A tuple `(key, info)`,
165
- where `key` is the record key, and `info` is an object that contains a single property `size`
166
- indicating size of the record in bytes.
167
- """
168
- while True:
169
- list_keys = await self._key_value_store_client.list_keys(exclusive_start_key=exclusive_start_key)
170
- for item in list_keys['items']:
171
- yield IterateKeysTuple(item['key'], {'size': item['size']})
172
-
173
- if not list_keys['isTruncated']:
174
- break
175
- exclusive_start_key = list_keys['nextExclusiveStartKey']
176
-
177
- @classmethod
178
- async def set_value(
179
- cls: type[KeyValueStore],
180
- key: str,
181
- value: Any,
182
- content_type: str | None = None,
183
- ) -> None:
184
- """Set or delete a value in the key-value store.
185
-
186
- Args:
187
- key (str): The key under which the value should be saved.
188
- value (Any): The value to save. If the value is `None`, the corresponding key-value pair will be deleted.
189
- content_type (str, optional): The content type of the saved value.
190
- """
191
- store = await cls.open()
192
- return await store.set_value(key, value, content_type)
193
-
194
- async def _set_value_internal(
195
- self: KeyValueStore,
196
- key: str,
197
- value: Any,
198
- content_type: str | None = None,
199
- ) -> None:
200
- if value is None:
201
- return await self._key_value_store_client.delete_record(key)
202
-
203
- return await self._key_value_store_client.set_record(key, value, content_type)
204
-
205
- @classmethod
206
- async def get_public_url(cls: type[KeyValueStore], key: str) -> str:
207
- """Get a URL for the given key that may be used to publicly access the value in the remote key-value store.
208
-
209
- Args:
210
- key (str): The key for which the URL should be generated.
211
- """
212
- store = await cls.open()
213
- return await store.get_public_url(key)
214
-
215
- async def _get_public_url_internal(self: KeyValueStore, key: str) -> str:
216
- if not isinstance(self._key_value_store_client, KeyValueStoreClientAsync):
217
- raise RuntimeError('Cannot generate a public URL for this key-value store as it is not on the Apify Platform!') # noqa: TRY004
218
-
219
- public_api_url = self._config.api_public_base_url
220
-
221
- return f'{public_api_url}/v2/key-value-stores/{self._id}/records/{key}'
222
-
223
- async def drop(self: KeyValueStore) -> None:
224
- """Remove the key-value store either from the Apify cloud storage or from the local directory."""
225
- await self._key_value_store_client.delete()
226
- self._remove_from_cache()
227
-
228
- @classmethod
229
- async def open(
230
- cls: type[KeyValueStore],
231
- *,
232
- id: str | None = None, # noqa: A002
233
- name: str | None = None,
234
- force_cloud: bool = False,
235
- config: Configuration | None = None,
236
- ) -> KeyValueStore:
237
- """Open a key-value store.
238
-
239
- Key-value stores are used to store records or files, along with their MIME content type.
240
- The records are stored and retrieved using a unique key.
241
- The actual data is stored either on a local filesystem or in the Apify cloud.
242
-
243
- Args:
244
- id (str, optional): ID of the key-value store to be opened.
245
- If neither `id` nor `name` are provided, the method returns the default key-value store associated with the actor run.
246
- If the key-value store with the given ID does not exist, it raises an error.
247
- name (str, optional): Name of the key-value store to be opened.
248
- If neither `id` nor `name` are provided, the method returns the default key-value store associated with the actor run.
249
- If the key-value store with the given name does not exist, it is created.
250
- force_cloud (bool, optional): If set to True, it will open a key-value store on the Apify Platform even when running the actor locally.
251
- Defaults to False.
252
- config (Configuration, optional): A `Configuration` instance, uses global configuration if omitted.
253
-
254
- Returns:
255
- KeyValueStore: An instance of the `KeyValueStore` class for the given ID or name.
256
- """
257
- return await super().open(id=id, name=name, force_cloud=force_cloud, config=config) # type: ignore