apify 3.0.2b6__py3-none-any.whl → 3.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/request_loaders/_apify_request_list.py +2 -2
- apify/storage_clients/_apify/_dataset_client.py +12 -29
- apify/storage_clients/_apify/_key_value_store_client.py +13 -43
- apify/storage_clients/_apify/_models.py +2 -2
- apify/storage_clients/_apify/_request_queue_client.py +7 -30
- apify/storage_clients/_apify/_request_queue_shared_client.py +54 -48
- apify/storage_clients/_apify/_request_queue_single_client.py +86 -67
- apify/storage_clients/_apify/_utils.py +27 -0
- {apify-3.0.2b6.dist-info → apify-3.0.3.dist-info}/METADATA +2 -2
- {apify-3.0.2b6.dist-info → apify-3.0.3.dist-info}/RECORD +12 -12
- {apify-3.0.2b6.dist-info → apify-3.0.3.dist-info}/WHEEL +0 -0
- {apify-3.0.2b6.dist-info → apify-3.0.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -22,8 +22,8 @@ URL_NO_COMMAS_REGEX = re.compile(
|
|
|
22
22
|
class _RequestDetails(BaseModel):
|
|
23
23
|
method: HttpMethod = 'GET'
|
|
24
24
|
payload: str = ''
|
|
25
|
-
headers: Annotated[dict[str, str], Field(default_factory=dict)]
|
|
26
|
-
user_data: Annotated[dict[str, str], Field(default_factory=dict, alias='userData')]
|
|
25
|
+
headers: Annotated[dict[str, str], Field(default_factory=dict)]
|
|
26
|
+
user_data: Annotated[dict[str, str], Field(default_factory=dict, alias='userData')]
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class _RequestsFromUrlInput(_RequestDetails):
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import warnings
|
|
4
5
|
from logging import getLogger
|
|
5
6
|
from typing import TYPE_CHECKING, Any
|
|
6
7
|
|
|
7
8
|
from typing_extensions import override
|
|
8
9
|
|
|
9
|
-
from apify_client import ApifyClientAsync
|
|
10
10
|
from crawlee._utils.byte_size import ByteSize
|
|
11
11
|
from crawlee._utils.file import json_dumps
|
|
12
12
|
from crawlee.storage_clients._base import DatasetClient
|
|
13
13
|
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
|
|
14
14
|
from crawlee.storages import Dataset
|
|
15
15
|
|
|
16
|
-
from ._utils import AliasResolver
|
|
16
|
+
from ._utils import AliasResolver, create_apify_client
|
|
17
17
|
|
|
18
18
|
if TYPE_CHECKING:
|
|
19
19
|
from collections.abc import AsyncIterator
|
|
@@ -52,12 +52,17 @@ class ApifyDatasetClient(DatasetClient):
|
|
|
52
52
|
self._api_client = api_client
|
|
53
53
|
"""The Apify dataset client for API operations."""
|
|
54
54
|
|
|
55
|
-
self._api_public_base_url = api_public_base_url
|
|
56
|
-
"""The public base URL for accessing the key-value store records."""
|
|
57
|
-
|
|
58
55
|
self._lock = lock
|
|
59
56
|
"""A lock to ensure that only one operation is performed at a time."""
|
|
60
57
|
|
|
58
|
+
if api_public_base_url:
|
|
59
|
+
# Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
|
|
60
|
+
warnings.warn(
|
|
61
|
+
'api_public_base_url argument is deprecated and will be removed in version 4.0.0',
|
|
62
|
+
DeprecationWarning,
|
|
63
|
+
stacklevel=2,
|
|
64
|
+
)
|
|
65
|
+
|
|
61
66
|
@override
|
|
62
67
|
async def get_metadata(self) -> DatasetMetadata:
|
|
63
68
|
metadata = await self._api_client.get()
|
|
@@ -99,29 +104,7 @@ class ApifyDatasetClient(DatasetClient):
|
|
|
99
104
|
if sum(1 for param in [id, name, alias] if param is not None) > 1:
|
|
100
105
|
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
|
|
101
106
|
|
|
102
|
-
|
|
103
|
-
if not token:
|
|
104
|
-
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
|
|
105
|
-
|
|
106
|
-
api_url = configuration.api_base_url
|
|
107
|
-
if not api_url:
|
|
108
|
-
raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
|
|
109
|
-
|
|
110
|
-
api_public_base_url = configuration.api_public_base_url
|
|
111
|
-
if not api_public_base_url:
|
|
112
|
-
raise ValueError(
|
|
113
|
-
'Apify storage client requires a valid API public base URL in Configuration '
|
|
114
|
-
f'(api_public_base_url={api_public_base_url}).'
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
# Create Apify client with the provided token and API URL.
|
|
118
|
-
apify_client_async = ApifyClientAsync(
|
|
119
|
-
token=token,
|
|
120
|
-
api_url=api_url,
|
|
121
|
-
max_retries=8,
|
|
122
|
-
min_delay_between_retries_millis=500,
|
|
123
|
-
timeout_secs=360,
|
|
124
|
-
)
|
|
107
|
+
apify_client_async = create_apify_client(configuration)
|
|
125
108
|
apify_datasets_client = apify_client_async.datasets()
|
|
126
109
|
|
|
127
110
|
# Normalize unnamed default storage in cases where not defined in `configuration.default_dataset_id` to unnamed
|
|
@@ -178,7 +161,7 @@ class ApifyDatasetClient(DatasetClient):
|
|
|
178
161
|
|
|
179
162
|
return cls(
|
|
180
163
|
api_client=apify_dataset_client,
|
|
181
|
-
api_public_base_url=
|
|
164
|
+
api_public_base_url='', # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
|
|
182
165
|
lock=asyncio.Lock(),
|
|
183
166
|
)
|
|
184
167
|
|
|
@@ -1,20 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import warnings
|
|
4
5
|
from logging import getLogger
|
|
5
6
|
from typing import TYPE_CHECKING, Any
|
|
6
7
|
|
|
7
8
|
from typing_extensions import override
|
|
8
|
-
from yarl import URL
|
|
9
9
|
|
|
10
|
-
from apify_client import ApifyClientAsync
|
|
11
10
|
from crawlee.storage_clients._base import KeyValueStoreClient
|
|
12
11
|
from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata
|
|
13
12
|
from crawlee.storages import KeyValueStore
|
|
14
13
|
|
|
15
14
|
from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage
|
|
16
|
-
from ._utils import AliasResolver
|
|
17
|
-
from apify._crypto import create_hmac_signature
|
|
15
|
+
from ._utils import AliasResolver, create_apify_client
|
|
18
16
|
|
|
19
17
|
if TYPE_CHECKING:
|
|
20
18
|
from collections.abc import AsyncIterator
|
|
@@ -43,12 +41,17 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
43
41
|
self._api_client = api_client
|
|
44
42
|
"""The Apify KVS client for API operations."""
|
|
45
43
|
|
|
46
|
-
self._api_public_base_url = api_public_base_url
|
|
47
|
-
"""The public base URL for accessing the key-value store records."""
|
|
48
|
-
|
|
49
44
|
self._lock = lock
|
|
50
45
|
"""A lock to ensure that only one operation is performed at a time."""
|
|
51
46
|
|
|
47
|
+
if api_public_base_url:
|
|
48
|
+
# Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
|
|
49
|
+
warnings.warn(
|
|
50
|
+
'api_public_base_url argument is deprecated and will be removed in version 4.0.0',
|
|
51
|
+
DeprecationWarning,
|
|
52
|
+
stacklevel=2,
|
|
53
|
+
)
|
|
54
|
+
|
|
52
55
|
@override
|
|
53
56
|
async def get_metadata(self) -> ApifyKeyValueStoreMetadata:
|
|
54
57
|
metadata = await self._api_client.get()
|
|
@@ -90,29 +93,7 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
90
93
|
if sum(1 for param in [id, name, alias] if param is not None) > 1:
|
|
91
94
|
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
|
|
92
95
|
|
|
93
|
-
|
|
94
|
-
if not token:
|
|
95
|
-
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
|
|
96
|
-
|
|
97
|
-
api_url = configuration.api_base_url
|
|
98
|
-
if not api_url:
|
|
99
|
-
raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
|
|
100
|
-
|
|
101
|
-
api_public_base_url = configuration.api_public_base_url
|
|
102
|
-
if not api_public_base_url:
|
|
103
|
-
raise ValueError(
|
|
104
|
-
'Apify storage client requires a valid API public base URL in Configuration '
|
|
105
|
-
f'(api_public_base_url={api_public_base_url}).'
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
# Create Apify client with the provided token and API URL.
|
|
109
|
-
apify_client_async = ApifyClientAsync(
|
|
110
|
-
token=token,
|
|
111
|
-
api_url=api_url,
|
|
112
|
-
max_retries=8,
|
|
113
|
-
min_delay_between_retries_millis=500,
|
|
114
|
-
timeout_secs=360,
|
|
115
|
-
)
|
|
96
|
+
apify_client_async = create_apify_client(configuration)
|
|
116
97
|
apify_kvss_client = apify_client_async.key_value_stores()
|
|
117
98
|
|
|
118
99
|
# Normalize unnamed default storage in cases where not defined in `configuration.default_key_value_store_id` to
|
|
@@ -170,7 +151,7 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
170
151
|
|
|
171
152
|
return cls(
|
|
172
153
|
api_client=apify_kvs_client,
|
|
173
|
-
api_public_base_url=
|
|
154
|
+
api_public_base_url='', # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
|
|
174
155
|
lock=asyncio.Lock(),
|
|
175
156
|
)
|
|
176
157
|
|
|
@@ -251,15 +232,4 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
|
|
|
251
232
|
Returns:
|
|
252
233
|
A public URL that can be used to access the value of the given key in the KVS.
|
|
253
234
|
"""
|
|
254
|
-
|
|
255
|
-
raise ValueError('resource_id cannot be None when generating a public URL')
|
|
256
|
-
|
|
257
|
-
public_url = (
|
|
258
|
-
URL(self._api_public_base_url) / 'v2' / 'key-value-stores' / self._api_client.resource_id / 'records' / key
|
|
259
|
-
)
|
|
260
|
-
metadata = await self.get_metadata()
|
|
261
|
-
|
|
262
|
-
if metadata.url_signing_secret_key is not None:
|
|
263
|
-
public_url = public_url.with_query(signature=create_hmac_signature(metadata.url_signing_secret_key, key))
|
|
264
|
-
|
|
265
|
-
return str(public_url)
|
|
235
|
+
return await self._api_client.get_record_public_url(key=key)
|
|
@@ -5,16 +5,15 @@ from typing import TYPE_CHECKING, Final, Literal
|
|
|
5
5
|
|
|
6
6
|
from typing_extensions import override
|
|
7
7
|
|
|
8
|
-
from apify_client import ApifyClientAsync
|
|
9
8
|
from crawlee._utils.crypto import crypto_random_object_id
|
|
10
9
|
from crawlee.storage_clients._base import RequestQueueClient
|
|
11
10
|
from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
|
|
12
11
|
from crawlee.storages import RequestQueue
|
|
13
12
|
|
|
14
13
|
from ._models import ApifyRequestQueueMetadata, RequestQueueStats
|
|
15
|
-
from ._request_queue_shared_client import
|
|
16
|
-
from ._request_queue_single_client import
|
|
17
|
-
from ._utils import AliasResolver
|
|
14
|
+
from ._request_queue_shared_client import ApifyRequestQueueSharedClient
|
|
15
|
+
from ._request_queue_single_client import ApifyRequestQueueSingleClient
|
|
16
|
+
from ._utils import AliasResolver, create_apify_client
|
|
18
17
|
|
|
19
18
|
if TYPE_CHECKING:
|
|
20
19
|
from collections.abc import Sequence
|
|
@@ -47,14 +46,14 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
47
46
|
self._api_client = api_client
|
|
48
47
|
"""The Apify request queue client for API operations."""
|
|
49
48
|
|
|
50
|
-
self._implementation:
|
|
49
|
+
self._implementation: ApifyRequestQueueSingleClient | ApifyRequestQueueSharedClient
|
|
51
50
|
"""Internal implementation used to communicate with the Apify platform based Request Queue."""
|
|
52
51
|
if access == 'single':
|
|
53
|
-
self._implementation =
|
|
52
|
+
self._implementation = ApifyRequestQueueSingleClient(
|
|
54
53
|
api_client=self._api_client, metadata=metadata, cache_size=self._MAX_CACHED_REQUESTS
|
|
55
54
|
)
|
|
56
55
|
elif access == 'shared':
|
|
57
|
-
self._implementation =
|
|
56
|
+
self._implementation = ApifyRequestQueueSharedClient(
|
|
58
57
|
api_client=self._api_client,
|
|
59
58
|
metadata=metadata,
|
|
60
59
|
cache_size=self._MAX_CACHED_REQUESTS,
|
|
@@ -228,29 +227,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
228
227
|
if sum(1 for param in [id, name, alias] if param is not None) > 1:
|
|
229
228
|
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
|
|
230
229
|
|
|
231
|
-
|
|
232
|
-
if not token:
|
|
233
|
-
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
|
|
234
|
-
|
|
235
|
-
api_url = configuration.api_base_url
|
|
236
|
-
if not api_url:
|
|
237
|
-
raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
|
|
238
|
-
|
|
239
|
-
api_public_base_url = configuration.api_public_base_url
|
|
240
|
-
if not api_public_base_url:
|
|
241
|
-
raise ValueError(
|
|
242
|
-
'Apify storage client requires a valid API public base URL in Configuration '
|
|
243
|
-
f'(api_public_base_url={api_public_base_url}).'
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
# Create Apify client with the provided token and API URL.
|
|
247
|
-
apify_client_async = ApifyClientAsync(
|
|
248
|
-
token=token,
|
|
249
|
-
api_url=api_url,
|
|
250
|
-
max_retries=8,
|
|
251
|
-
min_delay_between_retries_millis=500,
|
|
252
|
-
timeout_secs=360,
|
|
253
|
-
)
|
|
230
|
+
apify_client_async = create_apify_client(configuration)
|
|
254
231
|
apify_rqs_client = apify_client_async.request_queues()
|
|
255
232
|
|
|
256
233
|
# Normalize unnamed default storage in cases where not defined in `configuration.default_request_queue_id` to
|
|
@@ -23,7 +23,7 @@ if TYPE_CHECKING:
|
|
|
23
23
|
logger = getLogger(__name__)
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
class
|
|
26
|
+
class ApifyRequestQueueSharedClient:
|
|
27
27
|
"""An Apify platform implementation of the request queue client.
|
|
28
28
|
|
|
29
29
|
This implementation supports multiple producers and multiple consumers scenario.
|
|
@@ -54,10 +54,10 @@ class _ApifyRequestQueueSharedClient:
|
|
|
54
54
|
"""The Apify request queue client for API operations."""
|
|
55
55
|
|
|
56
56
|
self._queue_head = deque[str]()
|
|
57
|
-
"""A deque to store request
|
|
57
|
+
"""A deque to store request ids in the queue head."""
|
|
58
58
|
|
|
59
59
|
self._requests_cache: LRUCache[str, CachedRequest] = LRUCache(maxsize=cache_size)
|
|
60
|
-
"""A cache to store request objects. Request
|
|
60
|
+
"""A cache to store request objects. Request id is used as the cache key."""
|
|
61
61
|
|
|
62
62
|
self._queue_has_locked_requests: bool | None = None
|
|
63
63
|
"""Whether the queue has requests locked by another client."""
|
|
@@ -101,31 +101,30 @@ class _ApifyRequestQueueSharedClient:
|
|
|
101
101
|
already_present_requests: list[ProcessedRequest] = []
|
|
102
102
|
|
|
103
103
|
for request in requests:
|
|
104
|
-
|
|
104
|
+
request_id = unique_key_to_request_id(request.unique_key)
|
|
105
|
+
if self._requests_cache.get(request_id):
|
|
105
106
|
# We are not sure if it was already handled at this point, and it is not worth calling API for it.
|
|
106
107
|
# It could have been handled by another client in the meantime, so cached information about
|
|
107
108
|
# `request.was_already_handled` is not reliable.
|
|
108
109
|
already_present_requests.append(
|
|
109
|
-
ProcessedRequest
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
}
|
|
110
|
+
ProcessedRequest(
|
|
111
|
+
id=request_id,
|
|
112
|
+
unique_key=request.unique_key,
|
|
113
|
+
was_already_present=True,
|
|
114
|
+
was_already_handled=request.was_already_handled,
|
|
115
115
|
)
|
|
116
116
|
)
|
|
117
117
|
|
|
118
118
|
else:
|
|
119
119
|
# Add new request to the cache.
|
|
120
|
-
processed_request = ProcessedRequest
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
}
|
|
120
|
+
processed_request = ProcessedRequest(
|
|
121
|
+
id=request_id,
|
|
122
|
+
unique_key=request.unique_key,
|
|
123
|
+
was_already_present=True,
|
|
124
|
+
was_already_handled=request.was_already_handled,
|
|
126
125
|
)
|
|
127
126
|
self._cache_request(
|
|
128
|
-
|
|
127
|
+
request_id,
|
|
129
128
|
processed_request,
|
|
130
129
|
)
|
|
131
130
|
new_requests.append(request)
|
|
@@ -135,7 +134,6 @@ class _ApifyRequestQueueSharedClient:
|
|
|
135
134
|
requests_dict = [
|
|
136
135
|
request.model_dump(
|
|
137
136
|
by_alias=True,
|
|
138
|
-
exclude={'id'}, # Exclude ID fields from requests since the API doesn't accept them.
|
|
139
137
|
)
|
|
140
138
|
for request in new_requests
|
|
141
139
|
]
|
|
@@ -150,7 +148,8 @@ class _ApifyRequestQueueSharedClient:
|
|
|
150
148
|
|
|
151
149
|
# Remove unprocessed requests from the cache
|
|
152
150
|
for unprocessed_request in api_response.unprocessed_requests:
|
|
153
|
-
|
|
151
|
+
unprocessed_request_id = unique_key_to_request_id(unprocessed_request.unique_key)
|
|
152
|
+
self._requests_cache.pop(unprocessed_request_id, None)
|
|
154
153
|
|
|
155
154
|
else:
|
|
156
155
|
api_response = AddRequestsResponse.model_validate(
|
|
@@ -183,7 +182,10 @@ class _ApifyRequestQueueSharedClient:
|
|
|
183
182
|
Returns:
|
|
184
183
|
The request or None if not found.
|
|
185
184
|
"""
|
|
186
|
-
|
|
185
|
+
return await self._get_request_by_id(unique_key_to_request_id(unique_key))
|
|
186
|
+
|
|
187
|
+
async def _get_request_by_id(self, request_id: str) -> Request | None:
|
|
188
|
+
response = await self._api_client.get_request(request_id)
|
|
187
189
|
|
|
188
190
|
if response is None:
|
|
189
191
|
return None
|
|
@@ -210,15 +212,15 @@ class _ApifyRequestQueueSharedClient:
|
|
|
210
212
|
return None
|
|
211
213
|
|
|
212
214
|
# Get the next request ID from the queue head
|
|
213
|
-
|
|
215
|
+
next_request_id = self._queue_head.popleft()
|
|
214
216
|
|
|
215
|
-
request = await self._get_or_hydrate_request(
|
|
217
|
+
request = await self._get_or_hydrate_request(next_request_id)
|
|
216
218
|
|
|
217
219
|
# Handle potential inconsistency where request might not be in the main table yet
|
|
218
220
|
if request is None:
|
|
219
221
|
logger.debug(
|
|
220
222
|
'Cannot find a request from the beginning of queue, will be retried later',
|
|
221
|
-
extra={'
|
|
223
|
+
extra={'next_request_id': next_request_id},
|
|
222
224
|
)
|
|
223
225
|
return None
|
|
224
226
|
|
|
@@ -226,16 +228,16 @@ class _ApifyRequestQueueSharedClient:
|
|
|
226
228
|
if request.handled_at is not None:
|
|
227
229
|
logger.debug(
|
|
228
230
|
'Request fetched from the beginning of queue was already handled',
|
|
229
|
-
extra={'
|
|
231
|
+
extra={'next_request_id': next_request_id},
|
|
230
232
|
)
|
|
231
233
|
return None
|
|
232
234
|
|
|
233
235
|
# Use get request to ensure we have the full request object.
|
|
234
|
-
request = await self.
|
|
236
|
+
request = await self._get_request_by_id(next_request_id)
|
|
235
237
|
if request is None:
|
|
236
238
|
logger.debug(
|
|
237
239
|
'Request fetched from the beginning of queue was not found in the RQ',
|
|
238
|
-
extra={'
|
|
240
|
+
extra={'next_request_id': next_request_id},
|
|
239
241
|
)
|
|
240
242
|
return None
|
|
241
243
|
|
|
@@ -252,15 +254,17 @@ class _ApifyRequestQueueSharedClient:
|
|
|
252
254
|
Returns:
|
|
253
255
|
Information about the queue operation. `None` if the given request was not in progress.
|
|
254
256
|
"""
|
|
257
|
+
request_id = unique_key_to_request_id(request.unique_key)
|
|
255
258
|
# Set the handled_at timestamp if not already set
|
|
256
259
|
if request.handled_at is None:
|
|
257
260
|
request.handled_at = datetime.now(tz=timezone.utc)
|
|
258
261
|
|
|
259
|
-
if cached_request := self._requests_cache[
|
|
262
|
+
if cached_request := self._requests_cache[request_id]:
|
|
260
263
|
cached_request.was_already_handled = request.was_already_handled
|
|
261
264
|
try:
|
|
262
265
|
# Update the request in the API
|
|
263
266
|
processed_request = await self._update_request(request)
|
|
267
|
+
processed_request.id = request_id
|
|
264
268
|
processed_request.unique_key = request.unique_key
|
|
265
269
|
|
|
266
270
|
# Update assumed handled count if this wasn't already handled
|
|
@@ -269,10 +273,9 @@ class _ApifyRequestQueueSharedClient:
|
|
|
269
273
|
self.metadata.pending_request_count -= 1
|
|
270
274
|
|
|
271
275
|
# Update the cache with the handled request
|
|
272
|
-
cache_key = request.unique_key
|
|
273
276
|
self._cache_request(
|
|
274
|
-
cache_key,
|
|
275
|
-
processed_request,
|
|
277
|
+
cache_key=request_id,
|
|
278
|
+
processed_request=processed_request,
|
|
276
279
|
hydrated_request=request,
|
|
277
280
|
)
|
|
278
281
|
except Exception as exc:
|
|
@@ -356,17 +359,17 @@ class _ApifyRequestQueueSharedClient:
|
|
|
356
359
|
# Fetch requests from the API and populate the queue head
|
|
357
360
|
await self._list_head()
|
|
358
361
|
|
|
359
|
-
async def _get_or_hydrate_request(self,
|
|
360
|
-
"""Get a request by
|
|
362
|
+
async def _get_or_hydrate_request(self, request_id: str) -> Request | None:
|
|
363
|
+
"""Get a request by id, either from cache or by fetching from API.
|
|
361
364
|
|
|
362
365
|
Args:
|
|
363
|
-
|
|
366
|
+
request_id: Id of the request to get.
|
|
364
367
|
|
|
365
368
|
Returns:
|
|
366
369
|
The request if found and valid, otherwise None.
|
|
367
370
|
"""
|
|
368
371
|
# First check if the request is in our cache
|
|
369
|
-
cached_entry = self._requests_cache.get(
|
|
372
|
+
cached_entry = self._requests_cache.get(request_id)
|
|
370
373
|
|
|
371
374
|
if cached_entry and cached_entry.hydrated:
|
|
372
375
|
# If we have the request hydrated in cache, return it
|
|
@@ -375,17 +378,17 @@ class _ApifyRequestQueueSharedClient:
|
|
|
375
378
|
# If not in cache or not hydrated, fetch the request
|
|
376
379
|
try:
|
|
377
380
|
# Fetch the request data
|
|
378
|
-
request = await self.
|
|
381
|
+
request = await self._get_request_by_id(request_id)
|
|
379
382
|
|
|
380
383
|
# If request is not found and return None
|
|
381
384
|
if not request:
|
|
382
385
|
return None
|
|
383
386
|
|
|
384
387
|
# Update cache with hydrated request
|
|
385
|
-
cache_key = request.unique_key
|
|
386
388
|
self._cache_request(
|
|
387
|
-
cache_key,
|
|
388
|
-
ProcessedRequest(
|
|
389
|
+
cache_key=request_id,
|
|
390
|
+
processed_request=ProcessedRequest(
|
|
391
|
+
id=request_id,
|
|
389
392
|
unique_key=request.unique_key,
|
|
390
393
|
was_already_present=True,
|
|
391
394
|
was_already_handled=request.handled_at is not None,
|
|
@@ -393,7 +396,7 @@ class _ApifyRequestQueueSharedClient:
|
|
|
393
396
|
hydrated_request=request,
|
|
394
397
|
)
|
|
395
398
|
except Exception as exc:
|
|
396
|
-
logger.debug(f'Error fetching request {
|
|
399
|
+
logger.debug(f'Error fetching request {request_id}: {exc!s}')
|
|
397
400
|
return None
|
|
398
401
|
else:
|
|
399
402
|
return request
|
|
@@ -442,8 +445,8 @@ class _ApifyRequestQueueSharedClient:
|
|
|
442
445
|
logger.debug(f'Using cached queue head with {len(self._queue_head)} requests')
|
|
443
446
|
# Create a list of requests from the cached queue head
|
|
444
447
|
items = []
|
|
445
|
-
for
|
|
446
|
-
cached_request = self._requests_cache.get(
|
|
448
|
+
for request_id in list(self._queue_head)[:limit]:
|
|
449
|
+
cached_request = self._requests_cache.get(request_id)
|
|
447
450
|
if cached_request and cached_request.hydrated:
|
|
448
451
|
items.append(cached_request.hydrated)
|
|
449
452
|
|
|
@@ -476,32 +479,35 @@ class _ApifyRequestQueueSharedClient:
|
|
|
476
479
|
|
|
477
480
|
for request_data in response.get('items', []):
|
|
478
481
|
request = Request.model_validate(request_data)
|
|
482
|
+
request_id = request_data.get('id')
|
|
479
483
|
|
|
480
484
|
# Skip requests without ID or unique key
|
|
481
|
-
if not request.unique_key:
|
|
485
|
+
if not request.unique_key or not request_id:
|
|
482
486
|
logger.debug(
|
|
483
|
-
'Skipping request from queue head, missing unique key',
|
|
487
|
+
'Skipping request from queue head, missing unique key or id',
|
|
484
488
|
extra={
|
|
485
489
|
'unique_key': request.unique_key,
|
|
490
|
+
'id': request_id,
|
|
486
491
|
},
|
|
487
492
|
)
|
|
488
493
|
continue
|
|
489
494
|
|
|
490
495
|
# Cache the request
|
|
491
496
|
self._cache_request(
|
|
492
|
-
|
|
497
|
+
request_id,
|
|
493
498
|
ProcessedRequest(
|
|
499
|
+
id=request_id,
|
|
494
500
|
unique_key=request.unique_key,
|
|
495
501
|
was_already_present=True,
|
|
496
502
|
was_already_handled=False,
|
|
497
503
|
),
|
|
498
504
|
hydrated_request=request,
|
|
499
505
|
)
|
|
500
|
-
self._queue_head.append(
|
|
506
|
+
self._queue_head.append(request_id)
|
|
501
507
|
|
|
502
|
-
for
|
|
508
|
+
for leftover_id in leftover_buffer:
|
|
503
509
|
# After adding new requests to the forefront, any existing leftover locked request is kept in the end.
|
|
504
|
-
self._queue_head.append(
|
|
510
|
+
self._queue_head.append(leftover_id)
|
|
505
511
|
return RequestQueueHead.model_validate(response)
|
|
506
512
|
|
|
507
513
|
def _cache_request(
|
|
@@ -520,7 +526,7 @@ class _ApifyRequestQueueSharedClient:
|
|
|
520
526
|
hydrated_request: The hydrated request object, if available.
|
|
521
527
|
"""
|
|
522
528
|
self._requests_cache[cache_key] = CachedRequest(
|
|
523
|
-
|
|
529
|
+
id=processed_request.id,
|
|
524
530
|
was_already_handled=processed_request.was_already_handled,
|
|
525
531
|
hydrated=hydrated_request,
|
|
526
532
|
lock_expires_at=None,
|
|
@@ -21,7 +21,7 @@ if TYPE_CHECKING:
|
|
|
21
21
|
logger = getLogger(__name__)
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class
|
|
24
|
+
class ApifyRequestQueueSingleClient:
|
|
25
25
|
"""An Apify platform implementation of the request queue client with limited capability.
|
|
26
26
|
|
|
27
27
|
This client is designed to use as little resources as possible, but has to be used in constrained context.
|
|
@@ -56,21 +56,21 @@ class _ApifyRequestQueueSingleClient:
|
|
|
56
56
|
"""The Apify request queue client for API operations."""
|
|
57
57
|
|
|
58
58
|
self._requests_cache: LRUCache[str, Request] = LRUCache(maxsize=cache_size)
|
|
59
|
-
"""A cache to store request objects. Request
|
|
59
|
+
"""A cache to store request objects. Request id is used as the cache key."""
|
|
60
60
|
|
|
61
61
|
self._head_requests: deque[str] = deque()
|
|
62
|
-
"""Ordered
|
|
62
|
+
"""Ordered ids of requests that represent queue head."""
|
|
63
63
|
|
|
64
64
|
self._requests_already_handled: set[str] = set()
|
|
65
65
|
"""Local estimation of requests unique keys that are already present and handled on the platform.
|
|
66
66
|
|
|
67
67
|
- To enhance local deduplication.
|
|
68
68
|
- To reduce the _requests_cache size. Already handled requests are most likely not going to be needed again,
|
|
69
|
-
so no need to cache more than their
|
|
69
|
+
so no need to cache more than their id.
|
|
70
70
|
"""
|
|
71
71
|
|
|
72
72
|
self._requests_in_progress: set[str] = set()
|
|
73
|
-
"""Set of requests
|
|
73
|
+
"""Set of requests ids that are being processed locally.
|
|
74
74
|
|
|
75
75
|
- To help decide if the RQ is finished or not. This is the only consumer, so it can be tracked locally.
|
|
76
76
|
"""
|
|
@@ -105,26 +105,27 @@ class _ApifyRequestQueueSingleClient:
|
|
|
105
105
|
already_present_requests: list[ProcessedRequest] = []
|
|
106
106
|
|
|
107
107
|
for request in requests:
|
|
108
|
+
# Calculate id for request
|
|
109
|
+
request_id = unique_key_to_request_id(request.unique_key)
|
|
110
|
+
|
|
108
111
|
# Check if request is known to be already handled (it has to be present as well.)
|
|
109
|
-
if
|
|
112
|
+
if request_id in self._requests_already_handled:
|
|
110
113
|
already_present_requests.append(
|
|
111
|
-
ProcessedRequest
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
}
|
|
114
|
+
ProcessedRequest(
|
|
115
|
+
id=request_id,
|
|
116
|
+
unique_key=request.unique_key,
|
|
117
|
+
was_already_present=True,
|
|
118
|
+
was_already_handled=True,
|
|
117
119
|
)
|
|
118
120
|
)
|
|
119
121
|
# Check if request is known to be already present, but unhandled
|
|
120
|
-
elif self._requests_cache.get(
|
|
122
|
+
elif self._requests_cache.get(request_id):
|
|
121
123
|
already_present_requests.append(
|
|
122
|
-
ProcessedRequest
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
}
|
|
124
|
+
ProcessedRequest(
|
|
125
|
+
id=request_id,
|
|
126
|
+
unique_key=request.unique_key,
|
|
127
|
+
was_already_present=True,
|
|
128
|
+
was_already_handled=request.was_already_handled,
|
|
128
129
|
)
|
|
129
130
|
)
|
|
130
131
|
else:
|
|
@@ -132,11 +133,11 @@ class _ApifyRequestQueueSingleClient:
|
|
|
132
133
|
new_requests.append(request)
|
|
133
134
|
|
|
134
135
|
# Update local caches
|
|
135
|
-
self._requests_cache[
|
|
136
|
+
self._requests_cache[request_id] = request
|
|
136
137
|
if forefront:
|
|
137
|
-
self._head_requests.append(
|
|
138
|
+
self._head_requests.append(request_id)
|
|
138
139
|
else:
|
|
139
|
-
self._head_requests.appendleft(
|
|
140
|
+
self._head_requests.appendleft(request_id)
|
|
140
141
|
|
|
141
142
|
if new_requests:
|
|
142
143
|
# Prepare requests for API by converting to dictionaries.
|
|
@@ -155,11 +156,12 @@ class _ApifyRequestQueueSingleClient:
|
|
|
155
156
|
api_response.processed_requests.extend(already_present_requests)
|
|
156
157
|
# Remove unprocessed requests from the cache
|
|
157
158
|
for unprocessed_request in api_response.unprocessed_requests:
|
|
158
|
-
self._requests_cache.pop(unprocessed_request.unique_key, None)
|
|
159
|
+
self._requests_cache.pop(unique_key_to_request_id(unprocessed_request.unique_key), None)
|
|
159
160
|
|
|
160
161
|
else:
|
|
161
|
-
api_response = AddRequestsResponse
|
|
162
|
-
|
|
162
|
+
api_response = AddRequestsResponse(
|
|
163
|
+
unprocessed_requests=[],
|
|
164
|
+
processed_requests=already_present_requests,
|
|
163
165
|
)
|
|
164
166
|
|
|
165
167
|
# Update assumed total count for newly added requests.
|
|
@@ -181,15 +183,39 @@ class _ApifyRequestQueueSingleClient:
|
|
|
181
183
|
Returns:
|
|
182
184
|
The request or None if not found.
|
|
183
185
|
"""
|
|
184
|
-
|
|
185
|
-
|
|
186
|
+
return await self._get_request(id=unique_key_to_request_id(unique_key))
|
|
187
|
+
|
|
188
|
+
async def _get_request(self, id: str) -> Request | None:
|
|
189
|
+
"""Get a request by id.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
id: Id of request to get.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
The request or None if not found.
|
|
196
|
+
"""
|
|
197
|
+
if id in self._requests_cache:
|
|
198
|
+
return self._requests_cache[id]
|
|
186
199
|
|
|
187
|
-
|
|
200
|
+
# Requests that were not added by this client are not in local cache. Fetch them from platform.
|
|
201
|
+
response = await self._api_client.get_request(id)
|
|
188
202
|
|
|
189
203
|
if response is None:
|
|
190
204
|
return None
|
|
191
205
|
|
|
192
|
-
|
|
206
|
+
request = Request.model_validate(response)
|
|
207
|
+
|
|
208
|
+
# Updated local caches
|
|
209
|
+
if id in self._requests_in_progress:
|
|
210
|
+
# No caching of requests that are already in progress, client is already aware of them.
|
|
211
|
+
pass
|
|
212
|
+
elif request.was_already_handled:
|
|
213
|
+
# Cache only id for already handled requests
|
|
214
|
+
self._requests_already_handled.add(id)
|
|
215
|
+
else:
|
|
216
|
+
# Cache full request for unhandled requests that are not yet in progress and are not yet handled.
|
|
217
|
+
self._requests_cache[id] = request
|
|
218
|
+
return request
|
|
193
219
|
|
|
194
220
|
async def fetch_next_request(self) -> Request | None:
|
|
195
221
|
"""Return the next request in the queue to be processed.
|
|
@@ -205,13 +231,10 @@ class _ApifyRequestQueueSingleClient:
|
|
|
205
231
|
await self._ensure_head_is_non_empty()
|
|
206
232
|
|
|
207
233
|
while self._head_requests:
|
|
208
|
-
|
|
209
|
-
if
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
):
|
|
213
|
-
self._requests_in_progress.add(request_unique_key)
|
|
214
|
-
return await self.get_request(request_unique_key)
|
|
234
|
+
request_id = self._head_requests.pop()
|
|
235
|
+
if request_id not in self._requests_in_progress and request_id not in self._requests_already_handled:
|
|
236
|
+
self._requests_in_progress.add(request_id)
|
|
237
|
+
return await self._get_request(request_id)
|
|
215
238
|
# No request locally and the ones returned from the platform are already in progress.
|
|
216
239
|
return None
|
|
217
240
|
|
|
@@ -237,30 +260,18 @@ class _ApifyRequestQueueSingleClient:
|
|
|
237
260
|
# Update the cached data
|
|
238
261
|
for request_data in response.get('items', []):
|
|
239
262
|
request = Request.model_validate(request_data)
|
|
263
|
+
request_id = request_data['id']
|
|
240
264
|
|
|
241
|
-
if
|
|
265
|
+
if request_id in self._requests_in_progress:
|
|
242
266
|
# Ignore requests that are already in progress, we will not process them again.
|
|
243
267
|
continue
|
|
268
|
+
|
|
244
269
|
if request.was_already_handled:
|
|
245
|
-
# Do not cache fully handled requests, we do not need them. Just cache their
|
|
246
|
-
self._requests_already_handled.add(
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
request_id = unique_key_to_request_id(request.unique_key)
|
|
251
|
-
complete_request_data = await self._api_client.get_request(request_id)
|
|
252
|
-
|
|
253
|
-
if complete_request_data is not None:
|
|
254
|
-
request = Request.model_validate(complete_request_data)
|
|
255
|
-
self._requests_cache[request.unique_key] = request
|
|
256
|
-
else:
|
|
257
|
-
logger.warning(
|
|
258
|
-
f'Could not fetch request data for unique_key=`{request.unique_key}` (id=`{request_id}`)'
|
|
259
|
-
)
|
|
260
|
-
|
|
261
|
-
# Add new requests to the end of the head, unless already present in head
|
|
262
|
-
if request.unique_key not in self._head_requests:
|
|
263
|
-
self._head_requests.appendleft(request.unique_key)
|
|
270
|
+
# Do not cache fully handled requests, we do not need them. Just cache their id.
|
|
271
|
+
self._requests_already_handled.add(request_id)
|
|
272
|
+
# Add new requests to the end of the head, unless already present in head
|
|
273
|
+
elif request_id not in self._head_requests:
|
|
274
|
+
self._head_requests.appendleft(request_id)
|
|
264
275
|
|
|
265
276
|
async def mark_request_as_handled(self, request: Request) -> ProcessedRequest | None:
|
|
266
277
|
"""Mark a request as handled after successful processing.
|
|
@@ -275,12 +286,14 @@ class _ApifyRequestQueueSingleClient:
|
|
|
275
286
|
"""
|
|
276
287
|
# Set the handled_at timestamp if not already set
|
|
277
288
|
|
|
289
|
+
request_id = unique_key_to_request_id(request.unique_key)
|
|
290
|
+
|
|
278
291
|
if request.handled_at is None:
|
|
279
292
|
request.handled_at = datetime.now(tz=timezone.utc)
|
|
280
293
|
self.metadata.handled_request_count += 1
|
|
281
294
|
self.metadata.pending_request_count -= 1
|
|
282
295
|
|
|
283
|
-
if cached_request := self._requests_cache.get(
|
|
296
|
+
if cached_request := self._requests_cache.get(request_id):
|
|
284
297
|
cached_request.handled_at = request.handled_at
|
|
285
298
|
|
|
286
299
|
try:
|
|
@@ -289,10 +302,10 @@ class _ApifyRequestQueueSingleClient:
|
|
|
289
302
|
# adding to the queue.)
|
|
290
303
|
processed_request = await self._update_request(request)
|
|
291
304
|
# Remember that we handled this request, to optimize local deduplication.
|
|
292
|
-
self._requests_already_handled.add(
|
|
305
|
+
self._requests_already_handled.add(request_id)
|
|
293
306
|
# Remove request from cache. It will most likely not be needed.
|
|
294
|
-
self._requests_cache.pop(
|
|
295
|
-
self._requests_in_progress.discard(
|
|
307
|
+
self._requests_cache.pop(request_id)
|
|
308
|
+
self._requests_in_progress.discard(request_id)
|
|
296
309
|
|
|
297
310
|
except Exception as exc:
|
|
298
311
|
logger.debug(f'Error marking request {request.unique_key} as handled: {exc!s}')
|
|
@@ -319,23 +332,27 @@ class _ApifyRequestQueueSingleClient:
|
|
|
319
332
|
"""
|
|
320
333
|
# Check if the request was marked as handled and clear it. When reclaiming,
|
|
321
334
|
# we want to put the request back for processing.
|
|
335
|
+
|
|
336
|
+
request_id = unique_key_to_request_id(request.unique_key)
|
|
337
|
+
|
|
322
338
|
if request.was_already_handled:
|
|
323
339
|
request.handled_at = None
|
|
324
340
|
|
|
325
341
|
try:
|
|
326
342
|
# Make sure request is in the local cache. We might need it.
|
|
327
|
-
self._requests_cache[
|
|
343
|
+
self._requests_cache[request_id] = request
|
|
328
344
|
|
|
329
345
|
# No longer in progress
|
|
330
|
-
self._requests_in_progress.discard(
|
|
346
|
+
self._requests_in_progress.discard(request_id)
|
|
331
347
|
# No longer handled
|
|
332
|
-
self._requests_already_handled.discard(
|
|
348
|
+
self._requests_already_handled.discard(request_id)
|
|
333
349
|
|
|
334
350
|
if forefront:
|
|
335
351
|
# Append to top of the local head estimation
|
|
336
|
-
self._head_requests.append(
|
|
352
|
+
self._head_requests.append(request_id)
|
|
337
353
|
|
|
338
354
|
processed_request = await self._update_request(request, forefront=forefront)
|
|
355
|
+
processed_request.id = request_id
|
|
339
356
|
processed_request.unique_key = request.unique_key
|
|
340
357
|
# If the request was previously handled, decrement our handled count since
|
|
341
358
|
# we're putting it back for processing.
|
|
@@ -397,9 +414,11 @@ class _ApifyRequestQueueSingleClient:
|
|
|
397
414
|
response = await self._api_client.list_requests(limit=10_000)
|
|
398
415
|
for request_data in response.get('items', []):
|
|
399
416
|
request = Request.model_validate(request_data)
|
|
417
|
+
request_id = request_data['id']
|
|
418
|
+
|
|
400
419
|
if request.was_already_handled:
|
|
401
|
-
# Cache just
|
|
402
|
-
self._requests_already_handled.add(
|
|
420
|
+
# Cache just id for deduplication
|
|
421
|
+
self._requests_already_handled.add(request_id)
|
|
403
422
|
else:
|
|
404
423
|
# Cache full request
|
|
405
|
-
self._requests_cache[
|
|
424
|
+
self._requests_cache[request_id] = request
|
|
@@ -192,3 +192,30 @@ def unique_key_to_request_id(unique_key: str, *, request_id_length: int = 15) ->
|
|
|
192
192
|
|
|
193
193
|
# Truncate the key to the desired length
|
|
194
194
|
return url_safe_key[:request_id_length]
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def create_apify_client(configuration: Configuration) -> ApifyClientAsync:
|
|
198
|
+
"""Create and return an ApifyClientAsync instance using the provided configuration."""
|
|
199
|
+
if not configuration.token:
|
|
200
|
+
raise ValueError(f'Apify storage client requires a valid token in Configuration (token={configuration.token}).')
|
|
201
|
+
|
|
202
|
+
api_url = configuration.api_base_url
|
|
203
|
+
if not api_url:
|
|
204
|
+
raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
|
|
205
|
+
|
|
206
|
+
api_public_base_url = configuration.api_public_base_url
|
|
207
|
+
if not api_public_base_url:
|
|
208
|
+
raise ValueError(
|
|
209
|
+
'Apify storage client requires a valid API public base URL in Configuration '
|
|
210
|
+
f'(api_public_base_url={api_public_base_url}).'
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Create Apify client with the provided token and API URL.
|
|
214
|
+
return ApifyClientAsync(
|
|
215
|
+
token=configuration.token,
|
|
216
|
+
api_url=api_url,
|
|
217
|
+
api_public_url=api_public_base_url,
|
|
218
|
+
max_retries=8,
|
|
219
|
+
min_delay_between_retries_millis=500,
|
|
220
|
+
timeout_secs=360,
|
|
221
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 3.0.
|
|
3
|
+
Version: 3.0.3
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
|
|
@@ -225,7 +225,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
225
225
|
Classifier: Programming Language :: Python :: 3.13
|
|
226
226
|
Classifier: Topic :: Software Development :: Libraries
|
|
227
227
|
Requires-Python: >=3.10
|
|
228
|
-
Requires-Dist: apify-client<3.0.0,>=2.
|
|
228
|
+
Requires-Dist: apify-client<3.0.0,>=2.2.0
|
|
229
229
|
Requires-Dist: apify-shared<3.0.0,>=2.0.0
|
|
230
230
|
Requires-Dist: cachetools>=5.5.0
|
|
231
231
|
Requires-Dist: crawlee<2.0.0,>=1.0.2
|
|
@@ -14,7 +14,7 @@ apify/events/_apify_event_manager.py,sha256=yArFrKa4wWDZo32iwaA3F_w36VSJf1Yaj_L1
|
|
|
14
14
|
apify/events/_types.py,sha256=F0BHgACqnRfmdQ9GUcpnZvPxzw2bdRr8BqbGSA4cHeQ,3050
|
|
15
15
|
apify/events/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
apify/request_loaders/__init__.py,sha256=SJqW0FbdZKEtAMB5kBLgqROzh3KmQc2CNEIhJpTGdPQ,356
|
|
17
|
-
apify/request_loaders/_apify_request_list.py,sha256=
|
|
17
|
+
apify/request_loaders/_apify_request_list.py,sha256=jbZTHK3ACbh4YauYVJgXHXxB3rPcots5JMcr3GdIMz8,6210
|
|
18
18
|
apify/request_loaders/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
19
|
apify/scrapy/__init__.py,sha256=m2a0ts_JY9xJkBy4JU5mV8PJqjA3GGKLXBFu4nl-n-A,1048
|
|
20
20
|
apify/scrapy/_actor_runner.py,sha256=rXWSnlQWGskDUH8PtLCv5SkOIx4AiVa4QbCYeCett5c,938
|
|
@@ -35,14 +35,14 @@ apify/scrapy/pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
|
35
35
|
apify/storage_clients/__init__.py,sha256=JheTvNpVD_luQXC1KTEgtr6yVnuMEC9ajBNLCX3HuSo,358
|
|
36
36
|
apify/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
37
|
apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBsWa15Y5e7RMU,347
|
|
38
|
-
apify/storage_clients/_apify/_dataset_client.py,sha256=
|
|
39
|
-
apify/storage_clients/_apify/_key_value_store_client.py,sha256=
|
|
40
|
-
apify/storage_clients/_apify/_models.py,sha256=
|
|
41
|
-
apify/storage_clients/_apify/_request_queue_client.py,sha256=
|
|
42
|
-
apify/storage_clients/_apify/_request_queue_shared_client.py,sha256=
|
|
43
|
-
apify/storage_clients/_apify/_request_queue_single_client.py,sha256=
|
|
38
|
+
apify/storage_clients/_apify/_dataset_client.py,sha256=qmCJyL1MN83tYRXmc31P6yMIXVZMyRrGjr7R6-86FSE,11869
|
|
39
|
+
apify/storage_clients/_apify/_key_value_store_client.py,sha256=994a5bM_BGHIeirnny6QlXjy5CzMU2I9SmMksCbHCUY,9357
|
|
40
|
+
apify/storage_clients/_apify/_models.py,sha256=XxBru5XFdj0jqX6V-uVahT-pMQU3pZ501aTNzXCuoMU,4556
|
|
41
|
+
apify/storage_clients/_apify/_request_queue_client.py,sha256=tAyap34gpxvPiQ0McDjX5ojq1ZIZc4EI3PrW8VQqS4k,13292
|
|
42
|
+
apify/storage_clients/_apify/_request_queue_shared_client.py,sha256=pWmd6aPxM-eZ6PC1MfsfTcjD2mGGpCDS3ZZ3cG_2MEA,20971
|
|
43
|
+
apify/storage_clients/_apify/_request_queue_single_client.py,sha256=d2txMwxW1nlYnvjdOH8xpxhcOYNeyc1ousGHRE7jsPg,17468
|
|
44
44
|
apify/storage_clients/_apify/_storage_client.py,sha256=hFl_PuX1UgOydBD6pieZ0u2NWbDmZV-i0qygKdsuHt4,4873
|
|
45
|
-
apify/storage_clients/_apify/_utils.py,sha256=
|
|
45
|
+
apify/storage_clients/_apify/_utils.py,sha256=375gk_TJyMWIIgRbE9SS0hQup0h6sA3mzpTG53XIjkM,8769
|
|
46
46
|
apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
47
|
apify/storage_clients/_file_system/__init__.py,sha256=rDbXatXV9wHKPhKTrXDzWnexhTm7sIJQWucMi-P-SD4,130
|
|
48
48
|
apify/storage_clients/_file_system/_key_value_store_client.py,sha256=gxM3ap67PnY80Rd7P3onPAf2pksYpU0LoAlJdayEMdU,4179
|
|
@@ -51,7 +51,7 @@ apify/storage_clients/_smart_apify/__init__.py,sha256=614B2AaWY-dx6RQ6mod7VVR8gF
|
|
|
51
51
|
apify/storage_clients/_smart_apify/_storage_client.py,sha256=GCPmVe_xWAFcO2Cuej4su4i97_d33Q9Ih_Sc5xW2Wa4,4674
|
|
52
52
|
apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
|
|
53
53
|
apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
-
apify-3.0.
|
|
55
|
-
apify-3.0.
|
|
56
|
-
apify-3.0.
|
|
57
|
-
apify-3.0.
|
|
54
|
+
apify-3.0.3.dist-info/METADATA,sha256=BghvJ18oGZT-KqyoV3wlC6WdJ-rVupfzSUaWXLix2-k,22580
|
|
55
|
+
apify-3.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
56
|
+
apify-3.0.3.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
57
|
+
apify-3.0.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|