apify 2.7.3__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

Files changed (46) hide show
  1. apify/_actor.py +47 -12
  2. apify/_charging.py +15 -9
  3. apify/_configuration.py +34 -1
  4. apify/_crypto.py +0 -6
  5. apify/_models.py +7 -7
  6. apify/_proxy_configuration.py +10 -10
  7. apify/_utils.py +25 -2
  8. apify/events/__init__.py +5 -0
  9. apify/events/_apify_event_manager.py +140 -0
  10. apify/events/_types.py +102 -0
  11. apify/log.py +0 -9
  12. apify/request_loaders/__init__.py +18 -0
  13. apify/{storages/_request_list.py → request_loaders/_apify_request_list.py} +25 -18
  14. apify/request_loaders/py.typed +0 -0
  15. apify/scrapy/_logging_config.py +1 -4
  16. apify/scrapy/extensions/_httpcache.py +9 -5
  17. apify/scrapy/requests.py +3 -3
  18. apify/scrapy/scheduler.py +8 -5
  19. apify/storage_clients/__init__.py +10 -0
  20. apify/storage_clients/_apify/__init__.py +11 -0
  21. apify/storage_clients/_apify/_dataset_client.py +304 -0
  22. apify/storage_clients/_apify/_key_value_store_client.py +241 -0
  23. apify/storage_clients/_apify/_models.py +107 -0
  24. apify/storage_clients/_apify/_request_queue_client.py +787 -0
  25. apify/storage_clients/_apify/_storage_client.py +80 -0
  26. apify/storage_clients/_apify/py.typed +0 -0
  27. apify/storage_clients/_file_system/__init__.py +2 -0
  28. apify/storage_clients/_file_system/_key_value_store_client.py +36 -0
  29. apify/storage_clients/_file_system/_storage_client.py +35 -0
  30. apify/storage_clients/py.typed +0 -0
  31. apify/storages/__init__.py +1 -3
  32. {apify-2.7.3.dist-info → apify-3.0.0rc1.dist-info}/METADATA +8 -7
  33. apify-3.0.0rc1.dist-info/RECORD +52 -0
  34. apify/_platform_event_manager.py +0 -231
  35. apify/apify_storage_client/__init__.py +0 -3
  36. apify/apify_storage_client/_apify_storage_client.py +0 -72
  37. apify/apify_storage_client/_dataset_client.py +0 -190
  38. apify/apify_storage_client/_dataset_collection_client.py +0 -51
  39. apify/apify_storage_client/_key_value_store_client.py +0 -109
  40. apify/apify_storage_client/_key_value_store_collection_client.py +0 -51
  41. apify/apify_storage_client/_request_queue_client.py +0 -176
  42. apify/apify_storage_client/_request_queue_collection_client.py +0 -51
  43. apify-2.7.3.dist-info/RECORD +0 -44
  44. /apify/{apify_storage_client → events}/py.typed +0 -0
  45. {apify-2.7.3.dist-info → apify-3.0.0rc1.dist-info}/WHEEL +0 -0
  46. {apify-2.7.3.dist-info → apify-3.0.0rc1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,304 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from logging import getLogger
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ from typing_extensions import override
8
+
9
+ from apify_client import ApifyClientAsync
10
+ from crawlee._utils.byte_size import ByteSize
11
+ from crawlee._utils.file import json_dumps
12
+ from crawlee.storage_clients._base import DatasetClient
13
+ from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
14
+
15
+ if TYPE_CHECKING:
16
+ from collections.abc import AsyncIterator
17
+
18
+ from apify_client.clients import DatasetClientAsync
19
+ from crawlee._types import JsonSerializable
20
+
21
+ from apify import Configuration
22
+
23
+ logger = getLogger(__name__)
24
+
25
+
26
+ class ApifyDatasetClient(DatasetClient):
27
+ """An Apify platform implementation of the dataset client."""
28
+
29
+ _MAX_PAYLOAD_SIZE = ByteSize.from_mb(9)
30
+ """Maximum size for a single payload."""
31
+
32
+ _SAFETY_BUFFER_COEFFICIENT = 0.01 / 100 # 0.01%
33
+ """Percentage buffer to reduce payload limit slightly for safety."""
34
+
35
+ _EFFECTIVE_LIMIT_SIZE = _MAX_PAYLOAD_SIZE - (_MAX_PAYLOAD_SIZE * _SAFETY_BUFFER_COEFFICIENT)
36
+ """Calculated payload limit considering safety buffer."""
37
+
38
+ def __init__(
39
+ self,
40
+ *,
41
+ api_client: DatasetClientAsync,
42
+ api_public_base_url: str,
43
+ lock: asyncio.Lock,
44
+ ) -> None:
45
+ """Initialize a new instance.
46
+
47
+ Preferably use the `ApifyDatasetClient.open` class method to create a new instance.
48
+ """
49
+ self._api_client = api_client
50
+ """The Apify dataset client for API operations."""
51
+
52
+ self._api_public_base_url = api_public_base_url
53
+ """The public base URL for accessing the key-value store records."""
54
+
55
+ self._lock = lock
56
+ """A lock to ensure that only one operation is performed at a time."""
57
+
58
+ @override
59
+ async def get_metadata(self) -> DatasetMetadata:
60
+ metadata = await self._api_client.get()
61
+ return DatasetMetadata.model_validate(metadata)
62
+
63
+ @classmethod
64
+ async def open(
65
+ cls,
66
+ *,
67
+ id: str | None,
68
+ name: str | None,
69
+ configuration: Configuration,
70
+ ) -> ApifyDatasetClient:
71
+ """Open an Apify dataset client.
72
+
73
+ This method creates and initializes a new instance of the Apify dataset client.
74
+ It handles authentication, storage lookup/creation, and metadata retrieval.
75
+
76
+ Args:
77
+ id: The ID of an existing dataset to open. If provided, the client will connect to this specific storage.
78
+ Cannot be used together with `name`.
79
+ name: The name of a dataset to get or create. If a storage with this name exists, it will be opened;
80
+ otherwise, a new one will be created. Cannot be used together with `id`.
81
+ configuration: The configuration object containing API credentials and settings. Must include a valid
82
+ `token` and `api_base_url`. May also contain a `default_dataset_id` for fallback when neither
83
+ `id` nor `name` is provided.
84
+
85
+ Returns:
86
+ An instance for the opened or created storage client.
87
+
88
+ Raises:
89
+ ValueError: If the configuration is missing required fields (token, api_base_url), if both `id` and `name`
90
+ are provided, or if neither `id` nor `name` is provided and no default storage ID is available in
91
+ the configuration.
92
+ """
93
+ token = configuration.token
94
+ if not token:
95
+ raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
96
+
97
+ api_url = configuration.api_base_url
98
+ if not api_url:
99
+ raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
100
+
101
+ api_public_base_url = configuration.api_public_base_url
102
+ if not api_public_base_url:
103
+ raise ValueError(
104
+ 'Apify storage client requires a valid API public base URL in Configuration '
105
+ f'(api_public_base_url={api_public_base_url}).'
106
+ )
107
+
108
+ # Create Apify client with the provided token and API URL.
109
+ apify_client_async = ApifyClientAsync(
110
+ token=token,
111
+ api_url=api_url,
112
+ max_retries=8,
113
+ min_delay_between_retries_millis=500,
114
+ timeout_secs=360,
115
+ )
116
+ apify_datasets_client = apify_client_async.datasets()
117
+
118
+ # If both id and name are provided, raise an error.
119
+ if id and name:
120
+ raise ValueError('Only one of "id" or "name" can be specified, not both.')
121
+
122
+ # If id is provided, get the storage by ID.
123
+ if id and name is None:
124
+ apify_dataset_client = apify_client_async.dataset(dataset_id=id)
125
+
126
+ # If name is provided, get or create the storage by name.
127
+ if name and id is None:
128
+ id = DatasetMetadata.model_validate(
129
+ await apify_datasets_client.get_or_create(name=name),
130
+ ).id
131
+ apify_dataset_client = apify_client_async.dataset(dataset_id=id)
132
+
133
+ # If both id and name are None, try to get the default storage ID from environment variables.
134
+ # The default storage ID environment variable is set by the Apify platform. It also contains
135
+ # a new storage ID after Actor's reboot or migration.
136
+ if id is None and name is None:
137
+ id = configuration.default_dataset_id
138
+ apify_dataset_client = apify_client_async.dataset(dataset_id=id)
139
+
140
+ # Fetch its metadata.
141
+ metadata = await apify_dataset_client.get()
142
+
143
+ # If metadata is None, it means the storage does not exist, so we create it.
144
+ if metadata is None:
145
+ id = DatasetMetadata.model_validate(
146
+ await apify_datasets_client.get_or_create(),
147
+ ).id
148
+ apify_dataset_client = apify_client_async.dataset(dataset_id=id)
149
+
150
+ # Verify that the storage exists by fetching its metadata again.
151
+ metadata = await apify_dataset_client.get()
152
+ if metadata is None:
153
+ raise ValueError(f'Opening dataset with id={id} and name={name} failed.')
154
+
155
+ return cls(
156
+ api_client=apify_dataset_client,
157
+ api_public_base_url=api_public_base_url,
158
+ lock=asyncio.Lock(),
159
+ )
160
+
161
+ @override
162
+ async def purge(self) -> None:
163
+ raise NotImplementedError(
164
+ 'Purging datasets is not supported in the Apify platform. '
165
+ 'Use the `drop` method to delete the dataset instead.'
166
+ )
167
+
168
+ @override
169
+ async def drop(self) -> None:
170
+ async with self._lock:
171
+ await self._api_client.delete()
172
+
173
+ @override
174
+ async def push_data(self, data: list[Any] | dict[str, Any]) -> None:
175
+ async def payloads_generator() -> AsyncIterator[str]:
176
+ for index, item in enumerate(data):
177
+ yield await self._check_and_serialize(item, index)
178
+
179
+ async with self._lock:
180
+ # Handle lists
181
+ if isinstance(data, list):
182
+ # Invoke client in series to preserve the order of data
183
+ async for items in self._chunk_by_size(payloads_generator()):
184
+ await self._api_client.push_items(items=items)
185
+
186
+ # Handle singular items
187
+ else:
188
+ items = await self._check_and_serialize(data)
189
+ await self._api_client.push_items(items=items)
190
+
191
+ @override
192
+ async def get_data(
193
+ self,
194
+ *,
195
+ offset: int = 0,
196
+ limit: int | None = 999_999_999_999,
197
+ clean: bool = False,
198
+ desc: bool = False,
199
+ fields: list[str] | None = None,
200
+ omit: list[str] | None = None,
201
+ unwind: list[str] | None = None,
202
+ skip_empty: bool = False,
203
+ skip_hidden: bool = False,
204
+ flatten: list[str] | None = None,
205
+ view: str | None = None,
206
+ ) -> DatasetItemsListPage:
207
+ response = await self._api_client.list_items(
208
+ offset=offset,
209
+ limit=limit,
210
+ clean=clean,
211
+ desc=desc,
212
+ fields=fields,
213
+ omit=omit,
214
+ unwind=unwind,
215
+ skip_empty=skip_empty,
216
+ skip_hidden=skip_hidden,
217
+ flatten=flatten,
218
+ view=view,
219
+ )
220
+ return DatasetItemsListPage.model_validate(vars(response))
221
+
222
+ @override
223
+ async def iterate_items(
224
+ self,
225
+ *,
226
+ offset: int = 0,
227
+ limit: int | None = None,
228
+ clean: bool = False,
229
+ desc: bool = False,
230
+ fields: list[str] | None = None,
231
+ omit: list[str] | None = None,
232
+ unwind: list[str] | None = None,
233
+ skip_empty: bool = False,
234
+ skip_hidden: bool = False,
235
+ ) -> AsyncIterator[dict]:
236
+ async for item in self._api_client.iterate_items(
237
+ offset=offset,
238
+ limit=limit,
239
+ clean=clean,
240
+ desc=desc,
241
+ fields=fields,
242
+ omit=omit,
243
+ unwind=unwind,
244
+ skip_empty=skip_empty,
245
+ skip_hidden=skip_hidden,
246
+ ):
247
+ yield item
248
+
249
+ @classmethod
250
+ async def _check_and_serialize(cls, item: JsonSerializable, index: int | None = None) -> str:
251
+ """Serialize a given item to JSON, checks its serializability and size against a limit.
252
+
253
+ Args:
254
+ item: The item to serialize.
255
+ index: Index of the item, used for error context.
256
+
257
+ Returns:
258
+ Serialized JSON string.
259
+
260
+ Raises:
261
+ ValueError: If item is not JSON serializable or exceeds size limit.
262
+ """
263
+ s = ' ' if index is None else f' at index {index} '
264
+
265
+ try:
266
+ payload = await json_dumps(item)
267
+ except Exception as exc:
268
+ raise ValueError(f'Data item{s}is not serializable to JSON.') from exc
269
+
270
+ payload_size = ByteSize(len(payload.encode('utf-8')))
271
+ if payload_size > cls._EFFECTIVE_LIMIT_SIZE:
272
+ raise ValueError(f'Data item{s}is too large (size: {payload_size}, limit: {cls._EFFECTIVE_LIMIT_SIZE})')
273
+
274
+ return payload
275
+
276
+ async def _chunk_by_size(self, items: AsyncIterator[str]) -> AsyncIterator[str]:
277
+ """Yield chunks of JSON arrays composed of input strings, respecting a size limit.
278
+
279
+ Groups an iterable of JSON string payloads into larger JSON arrays, ensuring the total size
280
+ of each array does not exceed `EFFECTIVE_LIMIT_SIZE`. Each output is a JSON array string that
281
+ contains as many payloads as possible without breaching the size threshold, maintaining the
282
+ order of the original payloads. Assumes individual items are below the size limit.
283
+
284
+ Args:
285
+ items: Iterable of JSON string payloads.
286
+
287
+ Yields:
288
+ Strings representing JSON arrays of payloads, each staying within the size limit.
289
+ """
290
+ last_chunk_size = ByteSize(2) # Add 2 bytes for [] wrapper.
291
+ current_chunk = []
292
+
293
+ async for payload in items:
294
+ payload_size = ByteSize(len(payload.encode('utf-8')))
295
+
296
+ if last_chunk_size + payload_size <= self._EFFECTIVE_LIMIT_SIZE:
297
+ current_chunk.append(payload)
298
+ last_chunk_size += payload_size + ByteSize(1) # Add 1 byte for ',' separator.
299
+ else:
300
+ yield f'[{",".join(current_chunk)}]'
301
+ current_chunk = [payload]
302
+ last_chunk_size = payload_size + ByteSize(2) # Add 2 bytes for [] wrapper.
303
+
304
+ yield f'[{",".join(current_chunk)}]'
@@ -0,0 +1,241 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from logging import getLogger
5
+ from typing import TYPE_CHECKING, Any
6
+
7
+ from typing_extensions import override
8
+ from yarl import URL
9
+
10
+ from apify_client import ApifyClientAsync
11
+ from crawlee.storage_clients._base import KeyValueStoreClient
12
+ from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata
13
+
14
+ from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage
15
+ from apify._crypto import create_hmac_signature
16
+
17
+ if TYPE_CHECKING:
18
+ from collections.abc import AsyncIterator
19
+
20
+ from apify_client.clients import KeyValueStoreClientAsync
21
+
22
+ from apify import Configuration
23
+
24
+ logger = getLogger(__name__)
25
+
26
+
27
+ class ApifyKeyValueStoreClient(KeyValueStoreClient):
28
+ """An Apify platform implementation of the key-value store client."""
29
+
30
+ def __init__(
31
+ self,
32
+ *,
33
+ api_client: KeyValueStoreClientAsync,
34
+ api_public_base_url: str,
35
+ lock: asyncio.Lock,
36
+ ) -> None:
37
+ """Initialize a new instance.
38
+
39
+ Preferably use the `ApifyKeyValueStoreClient.open` class method to create a new instance.
40
+ """
41
+ self._api_client = api_client
42
+ """The Apify KVS client for API operations."""
43
+
44
+ self._api_public_base_url = api_public_base_url
45
+ """The public base URL for accessing the key-value store records."""
46
+
47
+ self._lock = lock
48
+ """A lock to ensure that only one operation is performed at a time."""
49
+
50
+ @override
51
+ async def get_metadata(self) -> ApifyKeyValueStoreMetadata:
52
+ metadata = await self._api_client.get()
53
+ return ApifyKeyValueStoreMetadata.model_validate(metadata)
54
+
55
+ @classmethod
56
+ async def open(
57
+ cls,
58
+ *,
59
+ id: str | None,
60
+ name: str | None,
61
+ configuration: Configuration,
62
+ ) -> ApifyKeyValueStoreClient:
63
+ """Open an Apify key-value store client.
64
+
65
+ This method creates and initializes a new instance of the Apify key-value store client.
66
+ It handles authentication, storage lookup/creation, and metadata retrieval.
67
+
68
+ Args:
69
+ id: The ID of an existing key-value store to open. If provided, the client will connect to this specific
70
+ storage. Cannot be used together with `name`.
71
+ name: The name of a key-value store to get or create. If a storage with this name exists, it will be
72
+ opened; otherwise, a new one will be created. Cannot be used together with `id`.
73
+ configuration: The configuration object containing API credentials and settings. Must include a valid
74
+ `token` and `api_base_url`. May also contain a `default_key_value_store_id` for fallback when
75
+ neither `id` nor `name` is provided.
76
+
77
+ Returns:
78
+ An instance for the opened or created storage client.
79
+
80
+ Raises:
81
+ ValueError: If the configuration is missing required fields (token, api_base_url), if both `id` and `name`
82
+ are provided, or if neither `id` nor `name` is provided and no default storage ID is available
83
+ in the configuration.
84
+ """
85
+ token = configuration.token
86
+ if not token:
87
+ raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
88
+
89
+ api_url = configuration.api_base_url
90
+ if not api_url:
91
+ raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
92
+
93
+ api_public_base_url = configuration.api_public_base_url
94
+ if not api_public_base_url:
95
+ raise ValueError(
96
+ 'Apify storage client requires a valid API public base URL in Configuration '
97
+ f'(api_public_base_url={api_public_base_url}).'
98
+ )
99
+
100
+ # Create Apify client with the provided token and API URL.
101
+ apify_client_async = ApifyClientAsync(
102
+ token=token,
103
+ api_url=api_url,
104
+ max_retries=8,
105
+ min_delay_between_retries_millis=500,
106
+ timeout_secs=360,
107
+ )
108
+ apify_kvss_client = apify_client_async.key_value_stores()
109
+
110
+ # If both id and name are provided, raise an error.
111
+ if id and name:
112
+ raise ValueError('Only one of "id" or "name" can be specified, not both.')
113
+
114
+ # If id is provided, get the storage by ID.
115
+ if id and name is None:
116
+ apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
117
+
118
+ # If name is provided, get or create the storage by name.
119
+ if name and id is None:
120
+ id = ApifyKeyValueStoreMetadata.model_validate(
121
+ await apify_kvss_client.get_or_create(name=name),
122
+ ).id
123
+ apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
124
+
125
+ # If both id and name are None, try to get the default storage ID from environment variables.
126
+ # The default storage ID environment variable is set by the Apify platform. It also contains
127
+ # a new storage ID after Actor's reboot or migration.
128
+ if id is None and name is None:
129
+ id = configuration.default_key_value_store_id
130
+ apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
131
+
132
+ # Fetch its metadata.
133
+ metadata = await apify_kvs_client.get()
134
+
135
+ # If metadata is None, it means the storage does not exist, so we create it.
136
+ if metadata is None:
137
+ id = ApifyKeyValueStoreMetadata.model_validate(
138
+ await apify_kvss_client.get_or_create(),
139
+ ).id
140
+ apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
141
+
142
+ # Verify that the storage exists by fetching its metadata again.
143
+ metadata = await apify_kvs_client.get()
144
+ if metadata is None:
145
+ raise ValueError(f'Opening key-value store with id={id} and name={name} failed.')
146
+
147
+ return cls(
148
+ api_client=apify_kvs_client,
149
+ api_public_base_url=api_public_base_url,
150
+ lock=asyncio.Lock(),
151
+ )
152
+
153
+ @override
154
+ async def purge(self) -> None:
155
+ raise NotImplementedError(
156
+ 'Purging key-value stores is not supported in the Apify platform. '
157
+ 'Use the `drop` method to delete the key-value store instead.'
158
+ )
159
+
160
+ @override
161
+ async def drop(self) -> None:
162
+ async with self._lock:
163
+ await self._api_client.delete()
164
+
165
+ @override
166
+ async def get_value(self, key: str) -> KeyValueStoreRecord | None:
167
+ response = await self._api_client.get_record(key)
168
+ return KeyValueStoreRecord.model_validate(response) if response else None
169
+
170
+ @override
171
+ async def set_value(self, key: str, value: Any, content_type: str | None = None) -> None:
172
+ async with self._lock:
173
+ await self._api_client.set_record(
174
+ key=key,
175
+ value=value,
176
+ content_type=content_type,
177
+ )
178
+
179
+ @override
180
+ async def delete_value(self, key: str) -> None:
181
+ async with self._lock:
182
+ await self._api_client.delete_record(key=key)
183
+
184
+ @override
185
+ async def iterate_keys(
186
+ self,
187
+ *,
188
+ exclusive_start_key: str | None = None,
189
+ limit: int | None = None,
190
+ ) -> AsyncIterator[KeyValueStoreRecordMetadata]:
191
+ count = 0
192
+
193
+ while True:
194
+ response = await self._api_client.list_keys(exclusive_start_key=exclusive_start_key)
195
+ list_key_page = KeyValueStoreListKeysPage.model_validate(response)
196
+
197
+ for item in list_key_page.items:
198
+ # Convert KeyValueStoreKeyInfo to KeyValueStoreRecordMetadata
199
+ record_metadata = KeyValueStoreRecordMetadata(
200
+ key=item.key,
201
+ size=item.size,
202
+ content_type='application/octet-stream', # Content type not available from list_keys
203
+ )
204
+ yield record_metadata
205
+ count += 1
206
+
207
+ # If we've reached the limit, stop yielding
208
+ if limit and count >= limit:
209
+ break
210
+
211
+ # If we've reached the limit or there are no more pages, exit the loop
212
+ if (limit and count >= limit) or not list_key_page.is_truncated:
213
+ break
214
+
215
+ exclusive_start_key = list_key_page.next_exclusive_start_key
216
+
217
+ @override
218
+ async def record_exists(self, key: str) -> bool:
219
+ return await self._api_client.record_exists(key=key)
220
+
221
+ async def get_public_url(self, key: str) -> str:
222
+ """Get a URL for the given key that may be used to publicly access the value in the remote key-value store.
223
+
224
+ Args:
225
+ key: The key for which the URL should be generated.
226
+
227
+ Returns:
228
+ A public URL that can be used to access the value of the given key in the KVS.
229
+ """
230
+ if self._api_client.resource_id is None:
231
+ raise ValueError('resource_id cannot be None when generating a public URL')
232
+
233
+ public_url = (
234
+ URL(self._api_public_base_url) / 'v2' / 'key-value-stores' / self._api_client.resource_id / 'records' / key
235
+ )
236
+ metadata = await self.get_metadata()
237
+
238
+ if metadata.url_signing_secret_key is not None:
239
+ public_url = public_url.with_query(signature=create_hmac_signature(metadata.url_signing_secret_key, key))
240
+
241
+ return str(public_url)
@@ -0,0 +1,107 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime, timedelta
4
+ from typing import Annotated
5
+
6
+ from pydantic import BaseModel, ConfigDict, Field
7
+
8
+ from crawlee.storage_clients.models import KeyValueStoreMetadata
9
+
10
+ from apify import Request
11
+ from apify._utils import docs_group
12
+
13
+
14
+ @docs_group('Storage data')
15
+ class ApifyKeyValueStoreMetadata(KeyValueStoreMetadata):
16
+ """Extended key-value store metadata model for Apify platform.
17
+
18
+ Includes additional Apify-specific fields.
19
+ """
20
+
21
+ url_signing_secret_key: Annotated[str | None, Field(alias='urlSigningSecretKey', default=None)]
22
+ """The secret key used for signing URLs for secure access to key-value store records."""
23
+
24
+
25
+ @docs_group('Storage data')
26
+ class ProlongRequestLockResponse(BaseModel):
27
+ """Response to prolong request lock calls."""
28
+
29
+ model_config = ConfigDict(populate_by_name=True)
30
+
31
+ lock_expires_at: Annotated[datetime, Field(alias='lockExpiresAt')]
32
+
33
+
34
+ @docs_group('Storage data')
35
+ class RequestQueueHead(BaseModel):
36
+ """Model for request queue head.
37
+
38
+ Represents a collection of requests retrieved from the beginning of a queue,
39
+ including metadata about the queue's state and lock information for the requests.
40
+ """
41
+
42
+ model_config = ConfigDict(populate_by_name=True)
43
+
44
+ limit: Annotated[int | None, Field(alias='limit', default=None)]
45
+ """The maximum number of requests that were requested from the queue."""
46
+
47
+ had_multiple_clients: Annotated[bool, Field(alias='hadMultipleClients', default=False)]
48
+ """Indicates whether the queue has been accessed by multiple clients (consumers)."""
49
+
50
+ queue_modified_at: Annotated[datetime, Field(alias='queueModifiedAt')]
51
+ """The timestamp when the queue was last modified."""
52
+
53
+ lock_time: Annotated[timedelta | None, Field(alias='lockSecs', default=None)]
54
+ """The duration for which the returned requests are locked and cannot be processed by other clients."""
55
+
56
+ queue_has_locked_requests: Annotated[bool | None, Field(alias='queueHasLockedRequests', default=False)]
57
+ """Indicates whether the queue contains any locked requests."""
58
+
59
+ items: Annotated[list[Request], Field(alias='items', default_factory=list[Request])]
60
+ """The list of request objects retrieved from the beginning of the queue."""
61
+
62
+
63
+ class KeyValueStoreKeyInfo(BaseModel):
64
+ """Model for a key-value store key info.
65
+
66
+ Only internal structure.
67
+ """
68
+
69
+ model_config = ConfigDict(populate_by_name=True)
70
+
71
+ key: Annotated[str, Field(alias='key')]
72
+ size: Annotated[int, Field(alias='size')]
73
+
74
+
75
+ class KeyValueStoreListKeysPage(BaseModel):
76
+ """Model for listing keys in the key-value store.
77
+
78
+ Only internal structure.
79
+ """
80
+
81
+ model_config = ConfigDict(populate_by_name=True)
82
+
83
+ count: Annotated[int, Field(alias='count')]
84
+ limit: Annotated[int, Field(alias='limit')]
85
+ is_truncated: Annotated[bool, Field(alias='isTruncated')]
86
+ items: Annotated[list[KeyValueStoreKeyInfo], Field(alias='items', default_factory=list)]
87
+ exclusive_start_key: Annotated[str | None, Field(alias='exclusiveStartKey', default=None)]
88
+ next_exclusive_start_key: Annotated[str | None, Field(alias='nextExclusiveStartKey', default=None)]
89
+
90
+
91
+ class CachedRequest(BaseModel):
92
+ """Pydantic model for cached request information.
93
+
94
+ Only internal structure.
95
+ """
96
+
97
+ unique_key: str
98
+ """Unique key of the request."""
99
+
100
+ was_already_handled: bool
101
+ """Whether the request was already handled."""
102
+
103
+ hydrated: Request | None = None
104
+ """The hydrated request object (the original one)."""
105
+
106
+ lock_expires_at: datetime | None = None
107
+ """The expiration time of the lock on the request."""