apify 2.7.1b18__py3-none-any.whl → 2.7.1b20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/_actor.py +0 -3
- apify/_configuration.py +6 -6
- apify/storage_clients/_apify/_dataset_client.py +9 -2
- apify/storage_clients/_apify/_key_value_store_client.py +9 -2
- apify/storage_clients/_apify/_models.py +25 -1
- apify/storage_clients/_apify/_request_queue_client.py +19 -5
- apify/storage_clients/_apify/_utils.py +3 -2
- {apify-2.7.1b18.dist-info → apify-2.7.1b20.dist-info}/METADATA +1 -1
- {apify-2.7.1b18.dist-info → apify-2.7.1b20.dist-info}/RECORD +11 -11
- {apify-2.7.1b18.dist-info → apify-2.7.1b20.dist-info}/WHEEL +0 -0
- {apify-2.7.1b18.dist-info → apify-2.7.1b20.dist-info}/licenses/LICENSE +0 -0
apify/_actor.py
CHANGED
|
@@ -324,9 +324,6 @@ class _ActorType:
|
|
|
324
324
|
self.log.info('Initializing Actor...')
|
|
325
325
|
self.log.info('System info', extra=get_system_info())
|
|
326
326
|
|
|
327
|
-
# TODO: Print outdated SDK version warning (we need a new env var for this)
|
|
328
|
-
# https://github.com/apify/apify-sdk-python/issues/146
|
|
329
|
-
|
|
330
327
|
await self.event_manager.__aenter__()
|
|
331
328
|
self.log.debug('Event manager initialized')
|
|
332
329
|
|
apify/_configuration.py
CHANGED
|
@@ -142,7 +142,7 @@ class Configuration(CrawleeConfiguration):
|
|
|
142
142
|
] = None
|
|
143
143
|
|
|
144
144
|
default_dataset_id: Annotated[
|
|
145
|
-
str,
|
|
145
|
+
str | None,
|
|
146
146
|
Field(
|
|
147
147
|
validation_alias=AliasChoices(
|
|
148
148
|
'actor_default_dataset_id',
|
|
@@ -150,10 +150,10 @@ class Configuration(CrawleeConfiguration):
|
|
|
150
150
|
),
|
|
151
151
|
description='Default dataset ID used by the Apify storage client when no ID or name is provided.',
|
|
152
152
|
),
|
|
153
|
-
] =
|
|
153
|
+
] = None
|
|
154
154
|
|
|
155
155
|
default_key_value_store_id: Annotated[
|
|
156
|
-
str,
|
|
156
|
+
str | None,
|
|
157
157
|
Field(
|
|
158
158
|
validation_alias=AliasChoices(
|
|
159
159
|
'actor_default_key_value_store_id',
|
|
@@ -161,10 +161,10 @@ class Configuration(CrawleeConfiguration):
|
|
|
161
161
|
),
|
|
162
162
|
description='Default key-value store ID for the Apify storage client when no ID or name is provided.',
|
|
163
163
|
),
|
|
164
|
-
] =
|
|
164
|
+
] = None
|
|
165
165
|
|
|
166
166
|
default_request_queue_id: Annotated[
|
|
167
|
-
str,
|
|
167
|
+
str | None,
|
|
168
168
|
Field(
|
|
169
169
|
validation_alias=AliasChoices(
|
|
170
170
|
'actor_default_request_queue_id',
|
|
@@ -172,7 +172,7 @@ class Configuration(CrawleeConfiguration):
|
|
|
172
172
|
),
|
|
173
173
|
description='Default request queue ID for the Apify storage client when no ID or name is provided.',
|
|
174
174
|
),
|
|
175
|
-
] =
|
|
175
|
+
] = None
|
|
176
176
|
|
|
177
177
|
disable_outdated_warning: Annotated[
|
|
178
178
|
bool,
|
|
@@ -124,8 +124,10 @@ class ApifyDatasetClient(DatasetClient):
|
|
|
124
124
|
)
|
|
125
125
|
apify_datasets_client = apify_client_async.datasets()
|
|
126
126
|
|
|
127
|
-
# Normalize
|
|
128
|
-
|
|
127
|
+
# Normalize unnamed default storage in cases where not defined in `configuration.default_dataset_id` to unnamed
|
|
128
|
+
# storage aliased as `__default__`
|
|
129
|
+
if not any([alias, name, id, configuration.default_dataset_id]):
|
|
130
|
+
alias = '__default__'
|
|
129
131
|
|
|
130
132
|
if alias:
|
|
131
133
|
# Check if there is pre-existing alias mapping in the default KVS.
|
|
@@ -150,6 +152,11 @@ class ApifyDatasetClient(DatasetClient):
|
|
|
150
152
|
# If none are provided, try to get the default storage ID from environment variables.
|
|
151
153
|
elif id is None:
|
|
152
154
|
id = configuration.default_dataset_id
|
|
155
|
+
if not id:
|
|
156
|
+
raise ValueError(
|
|
157
|
+
'Dataset "id", "name", or "alias" must be specified, '
|
|
158
|
+
'or a default dataset ID must be set in the configuration.'
|
|
159
|
+
)
|
|
153
160
|
|
|
154
161
|
# Now create the client for the determined ID
|
|
155
162
|
apify_dataset_client = apify_client_async.dataset(dataset_id=id)
|
|
@@ -115,8 +115,10 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
115
115
|
)
|
|
116
116
|
apify_kvss_client = apify_client_async.key_value_stores()
|
|
117
117
|
|
|
118
|
-
# Normalize
|
|
119
|
-
|
|
118
|
+
# Normalize unnamed default storage in cases where not defined in `configuration.default_key_value_store_id` to
|
|
119
|
+
# unnamed storage aliased as `__default__`
|
|
120
|
+
if not any([alias, name, id, configuration.default_key_value_store_id]):
|
|
121
|
+
alias = '__default__'
|
|
120
122
|
|
|
121
123
|
if alias:
|
|
122
124
|
# Check if there is pre-existing alias mapping in the default KVS.
|
|
@@ -142,6 +144,11 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
142
144
|
# If none are provided, try to get the default storage ID from environment variables.
|
|
143
145
|
elif id is None:
|
|
144
146
|
id = configuration.default_key_value_store_id
|
|
147
|
+
if not id:
|
|
148
|
+
raise ValueError(
|
|
149
|
+
'KeyValueStore "id", "name", or "alias" must be specified, '
|
|
150
|
+
'or a default KeyValueStore ID must be set in the configuration.'
|
|
151
|
+
)
|
|
145
152
|
|
|
146
153
|
# Now create the client for the determined ID
|
|
147
154
|
apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
|
|
@@ -5,7 +5,7 @@ from typing import Annotated
|
|
|
5
5
|
|
|
6
6
|
from pydantic import BaseModel, ConfigDict, Field
|
|
7
7
|
|
|
8
|
-
from crawlee.storage_clients.models import KeyValueStoreMetadata
|
|
8
|
+
from crawlee.storage_clients.models import KeyValueStoreMetadata, RequestQueueMetadata
|
|
9
9
|
|
|
10
10
|
from apify import Request
|
|
11
11
|
from apify._utils import docs_group
|
|
@@ -105,3 +105,27 @@ class CachedRequest(BaseModel):
|
|
|
105
105
|
|
|
106
106
|
lock_expires_at: datetime | None = None
|
|
107
107
|
"""The expiration time of the lock on the request."""
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class RequestQueueStats(BaseModel):
|
|
111
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
112
|
+
|
|
113
|
+
delete_count: Annotated[int, Field(alias='deleteCount', default=0)]
|
|
114
|
+
""""The number of request queue deletes."""
|
|
115
|
+
|
|
116
|
+
head_item_read_count: Annotated[int, Field(alias='headItemReadCount', default=0)]
|
|
117
|
+
"""The number of request queue head reads."""
|
|
118
|
+
|
|
119
|
+
read_count: Annotated[int, Field(alias='readCount', default=0)]
|
|
120
|
+
"""The number of request queue reads."""
|
|
121
|
+
|
|
122
|
+
storage_bytes: Annotated[int, Field(alias='storageBytes', default=0)]
|
|
123
|
+
"""Storage size in Bytes."""
|
|
124
|
+
|
|
125
|
+
write_count: Annotated[int, Field(alias='writeCount', default=0)]
|
|
126
|
+
"""The number of request queue writes."""
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class ApifyRequestQueueMetadata(RequestQueueMetadata):
|
|
130
|
+
stats: Annotated[RequestQueueStats, Field(alias='stats', default_factory=RequestQueueStats)]
|
|
131
|
+
"""Additional statistics about the request queue."""
|
|
@@ -18,7 +18,13 @@ from crawlee.storage_clients._base import RequestQueueClient
|
|
|
18
18
|
from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
|
|
19
19
|
from crawlee.storages import RequestQueue
|
|
20
20
|
|
|
21
|
-
from ._models import
|
|
21
|
+
from ._models import (
|
|
22
|
+
ApifyRequestQueueMetadata,
|
|
23
|
+
CachedRequest,
|
|
24
|
+
ProlongRequestLockResponse,
|
|
25
|
+
RequestQueueHead,
|
|
26
|
+
RequestQueueStats,
|
|
27
|
+
)
|
|
22
28
|
from ._utils import AliasResolver
|
|
23
29
|
from apify import Request
|
|
24
30
|
|
|
@@ -108,7 +114,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
108
114
|
return self._metadata
|
|
109
115
|
|
|
110
116
|
@override
|
|
111
|
-
async def get_metadata(self) ->
|
|
117
|
+
async def get_metadata(self) -> ApifyRequestQueueMetadata:
|
|
112
118
|
"""Get metadata about the request queue.
|
|
113
119
|
|
|
114
120
|
Returns:
|
|
@@ -119,7 +125,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
119
125
|
if response is None:
|
|
120
126
|
raise ValueError('Failed to fetch request queue metadata from the API.')
|
|
121
127
|
# Enhance API response by local estimations (API can be delayed few seconds, while local estimation not.)
|
|
122
|
-
return
|
|
128
|
+
return ApifyRequestQueueMetadata(
|
|
123
129
|
id=response['id'],
|
|
124
130
|
name=response['name'],
|
|
125
131
|
total_request_count=max(response['totalRequestCount'], self._metadata.total_request_count),
|
|
@@ -129,6 +135,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
129
135
|
modified_at=max(response['modifiedAt'], self._metadata.modified_at),
|
|
130
136
|
accessed_at=max(response['accessedAt'], self._metadata.accessed_at),
|
|
131
137
|
had_multiple_clients=response['hadMultipleClients'] or self._metadata.had_multiple_clients,
|
|
138
|
+
stats=RequestQueueStats.model_validate(response['stats'], by_alias=True),
|
|
132
139
|
)
|
|
133
140
|
|
|
134
141
|
@classmethod
|
|
@@ -193,8 +200,10 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
193
200
|
)
|
|
194
201
|
apify_rqs_client = apify_client_async.request_queues()
|
|
195
202
|
|
|
196
|
-
# Normalize
|
|
197
|
-
|
|
203
|
+
# Normalize unnamed default storage in cases where not defined in `configuration.default_request_queue_id` to
|
|
204
|
+
# unnamed storage aliased as `__default__`
|
|
205
|
+
if not any([alias, name, id, configuration.default_request_queue_id]):
|
|
206
|
+
alias = '__default__'
|
|
198
207
|
|
|
199
208
|
if alias:
|
|
200
209
|
# Check if there is pre-existing alias mapping in the default KVS.
|
|
@@ -219,6 +228,11 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
219
228
|
# If none are provided, try to get the default storage ID from environment variables.
|
|
220
229
|
elif id is None:
|
|
221
230
|
id = configuration.default_request_queue_id
|
|
231
|
+
if not id:
|
|
232
|
+
raise ValueError(
|
|
233
|
+
'RequestQueue "id", "name", or "alias" must be specified, '
|
|
234
|
+
'or a default default_request_queue_id ID must be set in the configuration.'
|
|
235
|
+
)
|
|
222
236
|
|
|
223
237
|
# Use suitable client_key to make `hadMultipleClients` response of Apify API useful.
|
|
224
238
|
# It should persist across migrated or resurrected Actor runs on the Apify platform.
|
|
@@ -76,7 +76,7 @@ class AliasResolver:
|
|
|
76
76
|
Returns:
|
|
77
77
|
Map of aliases and storage ids.
|
|
78
78
|
"""
|
|
79
|
-
if not cls._alias_map:
|
|
79
|
+
if not cls._alias_map and Configuration.get_global_configuration().is_at_home:
|
|
80
80
|
default_kvs_client = await _get_default_kvs_client()
|
|
81
81
|
|
|
82
82
|
record = await default_kvs_client.get_record(cls._ALIAS_MAPPING_KEY)
|
|
@@ -156,7 +156,8 @@ async def _get_default_kvs_client() -> KeyValueStoreClientAsync:
|
|
|
156
156
|
min_delay_between_retries_millis=500,
|
|
157
157
|
timeout_secs=360,
|
|
158
158
|
)
|
|
159
|
-
|
|
159
|
+
if not configuration.default_key_value_store_id:
|
|
160
|
+
raise ValueError("'Configuration.default_key_value_store_id' must be set.")
|
|
160
161
|
return apify_client_async.key_value_store(key_value_store_id=configuration.default_key_value_store_id)
|
|
161
162
|
|
|
162
163
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
apify/__init__.py,sha256=HpgKg2FZWJuSPfDygzJ62psylhw4NN4tKFnoYUIhcd4,838
|
|
2
|
-
apify/_actor.py,sha256=
|
|
2
|
+
apify/_actor.py,sha256=FAN-ldqGaCztCehY7yNcEu3-jDr1ZuT0P0lyMcEDLNg,55960
|
|
3
3
|
apify/_charging.py,sha256=KjZ2DnEMS0Tt8ibizmmt0RwBq8FOAsD1z-hKFgdazcY,13143
|
|
4
|
-
apify/_configuration.py,sha256=
|
|
4
|
+
apify/_configuration.py,sha256=gq_UfWTgcP1_0kEMLhXVg33SgSxXjShbuzoXyCFfK0w,14682
|
|
5
5
|
apify/_consts.py,sha256=CjhyEJ4Mi0lcIrzfqz8dN7nPJWGjCeBrrXQy1PZ6zRI,440
|
|
6
6
|
apify/_crypto.py,sha256=tqUs13QkemDtGzvU41pIA2HUEawpDlgzqbwKjm4I8kM,6852
|
|
7
7
|
apify/_models.py,sha256=EzU-inWeJ7T5HNVYEwnYb79W-q4OAPhtrYctfRYzpTE,7848
|
|
@@ -35,19 +35,19 @@ apify/scrapy/pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
|
35
35
|
apify/storage_clients/__init__.py,sha256=9WLAKs2GnnP0yyKR0mc3AfJ1IqXF48V3KPMp6KaB8kU,277
|
|
36
36
|
apify/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
37
|
apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBsWa15Y5e7RMU,347
|
|
38
|
-
apify/storage_clients/_apify/_dataset_client.py,sha256=
|
|
39
|
-
apify/storage_clients/_apify/_key_value_store_client.py,sha256=
|
|
40
|
-
apify/storage_clients/_apify/_models.py,sha256=
|
|
41
|
-
apify/storage_clients/_apify/_request_queue_client.py,sha256=
|
|
38
|
+
apify/storage_clients/_apify/_dataset_client.py,sha256=Bb3UwOaFkyuEY7tuBf8K46R4ZP_b1EaAkDOXOqwSoW8,12498
|
|
39
|
+
apify/storage_clients/_apify/_key_value_store_client.py,sha256=42dARbLX2oeOW7uYYKkDyQbEriMuh55Mxh0SqvkOEGg,10529
|
|
40
|
+
apify/storage_clients/_apify/_models.py,sha256=GEaN7Got1zIg42QPH36obHRWRDVNtzOkRuOWYRf9bFU,4572
|
|
41
|
+
apify/storage_clients/_apify/_request_queue_client.py,sha256=xahuxmEOjms1fsWJk8vp5nBP5rG1WdYisfWjJidGoUQ,33243
|
|
42
42
|
apify/storage_clients/_apify/_storage_client.py,sha256=7oqn8-7zG7_cruw6jzmRl2htX2rOt-KPTzCRVNCcyTA,3304
|
|
43
|
-
apify/storage_clients/_apify/_utils.py,sha256=
|
|
43
|
+
apify/storage_clients/_apify/_utils.py,sha256=dRU-NPoQe7PI2FC_IRkisiggQUzo-w6q-u2LgcA5-Dc,6801
|
|
44
44
|
apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
45
|
apify/storage_clients/_file_system/__init__.py,sha256=rDbXatXV9wHKPhKTrXDzWnexhTm7sIJQWucMi-P-SD4,130
|
|
46
46
|
apify/storage_clients/_file_system/_key_value_store_client.py,sha256=fnSJ1EIOPCGfcE6e5S3Tux9VbnMVLCJjugkaQoH_9yo,2267
|
|
47
47
|
apify/storage_clients/_file_system/_storage_client.py,sha256=rcwpKYlrWzvlSA2xoxftg-EZAi_iGZ3vOCbu0C5lKDE,1396
|
|
48
48
|
apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
|
|
49
49
|
apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
50
|
-
apify-2.7.
|
|
51
|
-
apify-2.7.
|
|
52
|
-
apify-2.7.
|
|
53
|
-
apify-2.7.
|
|
50
|
+
apify-2.7.1b20.dist-info/METADATA,sha256=IKZkZKJ2lWf67ql48sAmvHqIXMxuuiz4w3v6H9adoSU,22580
|
|
51
|
+
apify-2.7.1b20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
52
|
+
apify-2.7.1b20.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
53
|
+
apify-2.7.1b20.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|