apify 3.0.1b1__py3-none-any.whl → 3.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/_actor.py +211 -152
- apify/_configuration.py +9 -0
- apify/scrapy/middlewares/apify_proxy.py +1 -1
- apify/storage_clients/_apify/_request_queue_client.py +5 -5
- apify/storage_clients/_apify/_request_queue_shared_client.py +9 -13
- apify/storage_clients/_apify/_request_queue_single_client.py +34 -18
- apify/storage_clients/_file_system/_key_value_store_client.py +66 -20
- {apify-3.0.1b1.dist-info → apify-3.0.2.dist-info}/METADATA +2 -2
- {apify-3.0.1b1.dist-info → apify-3.0.2.dist-info}/RECORD +11 -11
- {apify-3.0.1b1.dist-info → apify-3.0.2.dist-info}/WHEEL +0 -0
- {apify-3.0.1b1.dist-info → apify-3.0.2.dist-info}/licenses/LICENSE +0 -0
apify/_actor.py
CHANGED
|
@@ -32,7 +32,7 @@ from apify._consts import EVENT_LISTENERS_TIMEOUT
|
|
|
32
32
|
from apify._crypto import decrypt_input_secrets, load_private_key
|
|
33
33
|
from apify._models import ActorRun
|
|
34
34
|
from apify._proxy_configuration import ProxyConfiguration
|
|
35
|
-
from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython
|
|
35
|
+
from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython
|
|
36
36
|
from apify.events import ApifyEventManager, EventManager, LocalEventManager
|
|
37
37
|
from apify.log import _configure_logging, logger
|
|
38
38
|
from apify.storage_clients import ApifyStorageClient
|
|
@@ -48,10 +48,10 @@ if TYPE_CHECKING:
|
|
|
48
48
|
from typing_extensions import Self
|
|
49
49
|
|
|
50
50
|
from crawlee.proxy_configuration import _NewUrlFunction
|
|
51
|
+
from crawlee.storage_clients import StorageClient
|
|
51
52
|
|
|
52
53
|
from apify._models import Webhook
|
|
53
54
|
|
|
54
|
-
|
|
55
55
|
MainReturnType = TypeVar('MainReturnType')
|
|
56
56
|
|
|
57
57
|
|
|
@@ -98,7 +98,10 @@ class _ActorType:
|
|
|
98
98
|
"""
|
|
99
99
|
|
|
100
100
|
_is_rebooting = False
|
|
101
|
+
"""Whether the Actor is currently rebooting."""
|
|
102
|
+
|
|
101
103
|
_is_any_instance_initialized = False
|
|
104
|
+
"""Whether any Actor instance was initialized."""
|
|
102
105
|
|
|
103
106
|
def __init__(
|
|
104
107
|
self,
|
|
@@ -106,63 +109,149 @@ class _ActorType:
|
|
|
106
109
|
*,
|
|
107
110
|
configure_logging: bool = True,
|
|
108
111
|
exit_process: bool | None = None,
|
|
112
|
+
exit_code: int = 0,
|
|
113
|
+
status_message: str | None = None,
|
|
114
|
+
event_listeners_timeout: timedelta | None = EVENT_LISTENERS_TIMEOUT,
|
|
115
|
+
cleanup_timeout: timedelta = timedelta(seconds=30),
|
|
109
116
|
) -> None:
|
|
110
|
-
"""
|
|
111
|
-
|
|
112
|
-
Note that you don't have to do this, all the functionality is accessible using the default instance
|
|
113
|
-
(e.g. `Actor.open_dataset()`).
|
|
117
|
+
"""Initialize a new instance.
|
|
114
118
|
|
|
115
119
|
Args:
|
|
116
|
-
configuration: The Actor configuration to
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
120
|
+
configuration: The Actor configuration to use. If not provided, a default configuration is created.
|
|
121
|
+
configure_logging: Whether to set up the default logging configuration.
|
|
122
|
+
exit_process: Whether the Actor should call `sys.exit` when the context manager exits.
|
|
123
|
+
Defaults to True, except in IPython, Pytest, and Scrapy environments.
|
|
124
|
+
exit_code: The exit code the Actor should use when exiting.
|
|
125
|
+
status_message: Final status message to display upon Actor termination.
|
|
126
|
+
event_listeners_timeout: Maximum time to wait for Actor event listeners to complete before exiting.
|
|
127
|
+
cleanup_timeout: Maximum time to wait for cleanup tasks to finish.
|
|
121
128
|
"""
|
|
129
|
+
self._configuration = configuration
|
|
130
|
+
self._configure_logging = configure_logging
|
|
122
131
|
self._exit_process = self._get_default_exit_process() if exit_process is None else exit_process
|
|
123
|
-
self.
|
|
132
|
+
self._exit_code = exit_code
|
|
133
|
+
self._status_message = status_message
|
|
134
|
+
self._event_listeners_timeout = event_listeners_timeout
|
|
135
|
+
self._cleanup_timeout = cleanup_timeout
|
|
124
136
|
|
|
125
137
|
# Actor state when this method is being executed is unpredictable.
|
|
126
138
|
# Actor can be initialized by lazy object proxy or by user directly, or by both.
|
|
127
139
|
# Until `init` method is run, this state of uncertainty remains. This is the reason why any setting done here in
|
|
128
140
|
# `__init__` method should not be considered final.
|
|
129
141
|
|
|
130
|
-
self._configuration = configuration
|
|
131
|
-
self._configure_logging = configure_logging
|
|
132
142
|
self._apify_client: ApifyClientAsync | None = None
|
|
143
|
+
self._local_storage_client: StorageClient | None = None
|
|
133
144
|
|
|
145
|
+
self._is_exiting = False
|
|
134
146
|
self._is_initialized = False
|
|
135
147
|
|
|
136
148
|
async def __aenter__(self) -> Self:
|
|
137
|
-
"""
|
|
149
|
+
"""Enter the Actor context.
|
|
150
|
+
|
|
151
|
+
Initializes the Actor when used in an `async with` block. This method:
|
|
138
152
|
|
|
139
|
-
|
|
153
|
+
- Sets up local or cloud storage clients depending on whether the Actor runs locally or on the Apify platform.
|
|
154
|
+
- Configures the event manager and starts periodic state persistence.
|
|
155
|
+
- Initializes the charging manager for handling charging events.
|
|
156
|
+
- Configures logging after all core services are registered.
|
|
140
157
|
|
|
141
|
-
|
|
142
|
-
|
|
158
|
+
This method must be called exactly once per Actor instance. Re-initializing an Actor or having multiple
|
|
159
|
+
active Actor instances is not standard usage and may lead to warnings or unexpected behavior.
|
|
143
160
|
"""
|
|
144
|
-
|
|
161
|
+
if self._is_initialized:
|
|
162
|
+
raise RuntimeError('The Actor was already initialized!')
|
|
163
|
+
|
|
164
|
+
# Initialize configuration first - it's required for the next steps.
|
|
165
|
+
if self._configuration:
|
|
166
|
+
# User provided explicit configuration - register it in the service locator.
|
|
167
|
+
service_locator.set_configuration(self.configuration)
|
|
168
|
+
else:
|
|
169
|
+
# No explicit configuration provided - trigger creation of default configuration.
|
|
170
|
+
_ = self.configuration
|
|
171
|
+
|
|
172
|
+
# Configure logging based on the configuration, any logs before this point are lost.
|
|
173
|
+
if self._configure_logging:
|
|
174
|
+
_configure_logging()
|
|
175
|
+
self.log.debug('Logging configured')
|
|
176
|
+
|
|
177
|
+
self.log.info('Initializing Actor', extra=get_system_info())
|
|
178
|
+
self.log.debug('Configuration initialized')
|
|
179
|
+
|
|
180
|
+
# Warn about non-standard usage patterns.
|
|
181
|
+
if _ActorType._is_any_instance_initialized:
|
|
182
|
+
self.log.warning('Repeated Actor initialization detected - this is non-standard usage, proceed with care.')
|
|
183
|
+
|
|
184
|
+
# Update the global Actor proxy to refer to this instance.
|
|
185
|
+
cast('Proxy', Actor).__wrapped__ = self
|
|
186
|
+
self._is_exiting = False
|
|
187
|
+
self._was_final_persist_state_emitted = False
|
|
188
|
+
|
|
189
|
+
# Initialize the storage client and register it in the service locator.
|
|
190
|
+
_ = self._storage_client
|
|
191
|
+
self.log.debug('Storage client initialized')
|
|
192
|
+
|
|
193
|
+
# Initialize the event manager and register it in the service locator.
|
|
194
|
+
await self.event_manager.__aenter__()
|
|
195
|
+
self.log.debug('Event manager initialized')
|
|
196
|
+
|
|
197
|
+
# Initialize the charging manager.
|
|
198
|
+
await self._charging_manager_implementation.__aenter__()
|
|
199
|
+
self.log.debug('Charging manager initialized')
|
|
200
|
+
|
|
201
|
+
# Mark initialization as complete and update global state.
|
|
202
|
+
self._is_initialized = True
|
|
203
|
+
_ActorType._is_any_instance_initialized = True
|
|
145
204
|
return self
|
|
146
205
|
|
|
147
206
|
async def __aexit__(
|
|
148
207
|
self,
|
|
149
|
-
|
|
208
|
+
exc_type: type[BaseException] | None,
|
|
150
209
|
exc_value: BaseException | None,
|
|
151
|
-
|
|
210
|
+
exc_traceback: TracebackType | None,
|
|
152
211
|
) -> None:
|
|
153
|
-
"""Exit the Actor
|
|
212
|
+
"""Exit the Actor context.
|
|
213
|
+
|
|
214
|
+
If the block exits with an exception, the Actor fails with a non-zero exit code.
|
|
215
|
+
Otherwise, it exits cleanly. In both cases the Actor:
|
|
154
216
|
|
|
155
|
-
|
|
156
|
-
|
|
217
|
+
- Cancels periodic `PERSIST_STATE` events.
|
|
218
|
+
- Sends a final `PERSIST_STATE` event.
|
|
219
|
+
- Waits for all event listeners to finish.
|
|
220
|
+
- Stops the event manager and the charging manager.
|
|
221
|
+
- Optionally terminates the process with the selected exit code.
|
|
157
222
|
"""
|
|
158
|
-
if
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
223
|
+
if self._is_exiting:
|
|
224
|
+
return
|
|
225
|
+
|
|
226
|
+
self._raise_if_not_initialized()
|
|
227
|
+
|
|
228
|
+
if exc_value and not is_running_in_ipython():
|
|
229
|
+
# In IPython, we don't run `sys.exit()` during Actor exits,
|
|
230
|
+
# so the exception traceback will be printed on its own
|
|
231
|
+
self.log.exception('Actor failed with an exception', exc_info=exc_value)
|
|
232
|
+
self.exit_code = ActorExitCodes.ERROR_USER_FUNCTION_THREW.value
|
|
233
|
+
|
|
234
|
+
self._is_exiting = True
|
|
235
|
+
self.log.info('Exiting Actor', extra={'exit_code': self.exit_code})
|
|
236
|
+
|
|
237
|
+
async def finalize() -> None:
|
|
238
|
+
if self.status_message is not None:
|
|
239
|
+
await self.set_status_message(self.status_message, is_terminal=True)
|
|
240
|
+
|
|
241
|
+
# Sleep for a bit so that the listeners have a chance to trigger
|
|
242
|
+
await asyncio.sleep(0.1)
|
|
243
|
+
|
|
244
|
+
if self._event_listeners_timeout:
|
|
245
|
+
await self.event_manager.wait_for_all_listeners_to_complete(timeout=self._event_listeners_timeout)
|
|
246
|
+
|
|
247
|
+
await self.event_manager.__aexit__(None, None, None)
|
|
248
|
+
await self._charging_manager_implementation.__aexit__(None, None, None)
|
|
249
|
+
|
|
250
|
+
await asyncio.wait_for(finalize(), self._cleanup_timeout.total_seconds())
|
|
251
|
+
self._is_initialized = False
|
|
252
|
+
|
|
253
|
+
if self._exit_process:
|
|
254
|
+
sys.exit(self.exit_code)
|
|
166
255
|
|
|
167
256
|
def __repr__(self) -> str:
|
|
168
257
|
if self is cast('Proxy', Actor).__wrapped__:
|
|
@@ -176,24 +265,58 @@ class _ActorType:
|
|
|
176
265
|
*,
|
|
177
266
|
configure_logging: bool = True,
|
|
178
267
|
exit_process: bool | None = None,
|
|
268
|
+
exit_code: int = 0,
|
|
269
|
+
event_listeners_timeout: timedelta | None = EVENT_LISTENERS_TIMEOUT,
|
|
270
|
+
status_message: str | None = None,
|
|
271
|
+
cleanup_timeout: timedelta = timedelta(seconds=30),
|
|
179
272
|
) -> Self:
|
|
180
|
-
"""Make a new Actor instance with a non-default configuration.
|
|
273
|
+
"""Make a new Actor instance with a non-default configuration.
|
|
274
|
+
|
|
275
|
+
This is necessary due to the lazy object proxying of the global `Actor` instance.
|
|
276
|
+
"""
|
|
181
277
|
return self.__class__(
|
|
182
278
|
configuration=configuration,
|
|
183
279
|
configure_logging=configure_logging,
|
|
184
280
|
exit_process=exit_process,
|
|
281
|
+
exit_code=exit_code,
|
|
282
|
+
event_listeners_timeout=event_listeners_timeout,
|
|
283
|
+
status_message=status_message,
|
|
284
|
+
cleanup_timeout=cleanup_timeout,
|
|
185
285
|
)
|
|
186
286
|
|
|
287
|
+
@property
|
|
288
|
+
def log(self) -> logging.Logger:
|
|
289
|
+
"""Logger configured for this Actor."""
|
|
290
|
+
return logger
|
|
291
|
+
|
|
292
|
+
@property
|
|
293
|
+
def exit_code(self) -> int:
|
|
294
|
+
"""The exit code the Actor will use when exiting."""
|
|
295
|
+
return self._exit_code
|
|
296
|
+
|
|
297
|
+
@exit_code.setter
|
|
298
|
+
def exit_code(self, value: int) -> None:
|
|
299
|
+
self._exit_code = value
|
|
300
|
+
|
|
301
|
+
@property
|
|
302
|
+
def status_message(self) -> str | None:
|
|
303
|
+
"""The final status message that the Actor will display upon termination."""
|
|
304
|
+
return self._status_message
|
|
305
|
+
|
|
306
|
+
@status_message.setter
|
|
307
|
+
def status_message(self, value: str | None) -> None:
|
|
308
|
+
self._status_message = value
|
|
309
|
+
|
|
187
310
|
@property
|
|
188
311
|
def apify_client(self) -> ApifyClientAsync:
|
|
189
|
-
"""
|
|
312
|
+
"""Asynchronous Apify client for interacting with the Apify API."""
|
|
190
313
|
if not self._apify_client:
|
|
191
314
|
self._apify_client = self.new_client()
|
|
192
315
|
return self._apify_client
|
|
193
316
|
|
|
194
317
|
@cached_property
|
|
195
318
|
def configuration(self) -> Configuration:
|
|
196
|
-
"""
|
|
319
|
+
"""Actor configuration, uses the default instance if not explicitly set."""
|
|
197
320
|
if self._configuration:
|
|
198
321
|
return self._configuration
|
|
199
322
|
|
|
@@ -214,8 +337,11 @@ class _ActorType:
|
|
|
214
337
|
|
|
215
338
|
@cached_property
|
|
216
339
|
def event_manager(self) -> EventManager:
|
|
217
|
-
"""
|
|
218
|
-
|
|
340
|
+
"""Manages Apify platform events.
|
|
341
|
+
|
|
342
|
+
It uses `ApifyEventManager` on the Apify platform and `LocalEventManager` otherwise.
|
|
343
|
+
"""
|
|
344
|
+
event_manager = (
|
|
219
345
|
ApifyEventManager(
|
|
220
346
|
configuration=self.configuration,
|
|
221
347
|
persist_state_interval=self.configuration.persist_state_interval,
|
|
@@ -226,19 +352,16 @@ class _ActorType:
|
|
|
226
352
|
persist_state_interval=self.configuration.persist_state_interval,
|
|
227
353
|
)
|
|
228
354
|
)
|
|
355
|
+
service_locator.set_event_manager(event_manager)
|
|
356
|
+
return event_manager
|
|
229
357
|
|
|
230
|
-
@
|
|
231
|
-
def
|
|
232
|
-
|
|
233
|
-
return logger
|
|
234
|
-
|
|
235
|
-
def _raise_if_not_initialized(self) -> None:
|
|
236
|
-
if not self._is_initialized:
|
|
237
|
-
raise RuntimeError('The Actor was not initialized!')
|
|
358
|
+
@cached_property
|
|
359
|
+
def _charging_manager_implementation(self) -> ChargingManagerImplementation:
|
|
360
|
+
return ChargingManagerImplementation(self.configuration, self.apify_client)
|
|
238
361
|
|
|
239
362
|
@cached_property
|
|
240
363
|
def _storage_client(self) -> SmartApifyStorageClient:
|
|
241
|
-
"""Storage client used by the
|
|
364
|
+
"""Storage client used by the Actor.
|
|
242
365
|
|
|
243
366
|
Depending on the initialization of the service locator the client can be created in different ways.
|
|
244
367
|
"""
|
|
@@ -250,7 +373,7 @@ class _ActorType:
|
|
|
250
373
|
service_locator.set_storage_client(implicit_storage_client)
|
|
251
374
|
except ServiceConflictError:
|
|
252
375
|
self.log.debug(
|
|
253
|
-
'Storage client in service locator was set explicitly before Actor.init was called.'
|
|
376
|
+
'Storage client in service locator was set explicitly before Actor.init was called. '
|
|
254
377
|
'Using the existing storage client as implicit storage client for the Actor.'
|
|
255
378
|
)
|
|
256
379
|
else:
|
|
@@ -270,100 +393,35 @@ class _ActorType:
|
|
|
270
393
|
)
|
|
271
394
|
|
|
272
395
|
async def init(self) -> None:
|
|
273
|
-
"""Initialize the Actor
|
|
274
|
-
|
|
275
|
-
This initializes the Actor instance. It configures the right storage client based on whether the Actor is
|
|
276
|
-
running locally or on the Apify platform, it initializes the event manager for processing Actor events,
|
|
277
|
-
and starts an interval for regularly sending `PERSIST_STATE` events, so that the Actor can regularly persist
|
|
278
|
-
its state in response to these events.
|
|
396
|
+
"""Initialize the Actor without using context-manager syntax.
|
|
279
397
|
|
|
280
|
-
|
|
281
|
-
called only once.
|
|
398
|
+
Equivalent to `await Actor.__aenter__()`.
|
|
282
399
|
"""
|
|
283
|
-
self.
|
|
284
|
-
if self._configuration:
|
|
285
|
-
# Set explicitly the configuration in the service locator
|
|
286
|
-
service_locator.set_configuration(self.configuration)
|
|
287
|
-
else:
|
|
288
|
-
# Ensure that the configuration (cached property) is set
|
|
289
|
-
_ = self.configuration
|
|
290
|
-
|
|
291
|
-
if self._is_initialized:
|
|
292
|
-
raise RuntimeError('The Actor was already initialized!')
|
|
293
|
-
|
|
294
|
-
if _ActorType._is_any_instance_initialized:
|
|
295
|
-
self.log.warning('Repeated Actor initialization detected - this is non-standard usage, proceed with care')
|
|
296
|
-
|
|
297
|
-
# Make sure that the currently initialized instance is also available through the global `Actor` proxy
|
|
298
|
-
cast('Proxy', Actor).__wrapped__ = self
|
|
299
|
-
|
|
300
|
-
self._is_exiting = False
|
|
301
|
-
self._was_final_persist_state_emitted = False
|
|
302
|
-
|
|
303
|
-
self.log.debug(f'Storage client set to {self._storage_client}')
|
|
304
|
-
|
|
305
|
-
service_locator.set_event_manager(self.event_manager)
|
|
306
|
-
|
|
307
|
-
# The logging configuration has to be called after all service_locator set methods.
|
|
308
|
-
if self._configure_logging:
|
|
309
|
-
_configure_logging()
|
|
310
|
-
|
|
311
|
-
self.log.info('System info', extra=get_system_info())
|
|
312
|
-
|
|
313
|
-
await self.event_manager.__aenter__()
|
|
314
|
-
self.log.debug('Event manager initialized')
|
|
315
|
-
|
|
316
|
-
await self._charging_manager_implementation.__aenter__()
|
|
317
|
-
self.log.debug('Charging manager initialized')
|
|
318
|
-
|
|
319
|
-
self._is_initialized = True
|
|
320
|
-
_ActorType._is_any_instance_initialized = True
|
|
400
|
+
await self.__aenter__()
|
|
321
401
|
|
|
322
402
|
async def exit(
|
|
323
403
|
self,
|
|
324
404
|
*,
|
|
325
405
|
exit_code: int = 0,
|
|
326
|
-
event_listeners_timeout: timedelta | None = EVENT_LISTENERS_TIMEOUT,
|
|
327
406
|
status_message: str | None = None,
|
|
407
|
+
event_listeners_timeout: timedelta | None = EVENT_LISTENERS_TIMEOUT,
|
|
328
408
|
cleanup_timeout: timedelta = timedelta(seconds=30),
|
|
329
409
|
) -> None:
|
|
330
|
-
"""Exit the Actor
|
|
410
|
+
"""Exit the Actor without using context-manager syntax.
|
|
331
411
|
|
|
332
|
-
|
|
333
|
-
sends a final `PERSIST_STATE` event, waits for all the event listeners to finish, and stops the event manager.
|
|
412
|
+
Equivalent to `await Actor.__aexit__()`.
|
|
334
413
|
|
|
335
414
|
Args:
|
|
336
|
-
exit_code: The exit code
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
cleanup_timeout:
|
|
415
|
+
exit_code: The exit code the Actor should use when exiting.
|
|
416
|
+
status_message: Final status message to display upon Actor termination.
|
|
417
|
+
event_listeners_timeout: Maximum time to wait for Actor event listeners to complete before exiting.
|
|
418
|
+
cleanup_timeout: Maximum time to wait for cleanup tasks to finish.
|
|
340
419
|
"""
|
|
341
|
-
self.
|
|
342
|
-
|
|
343
|
-
self.
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
self.log.info('Exiting Actor', extra={'exit_code': exit_code})
|
|
348
|
-
|
|
349
|
-
async def finalize() -> None:
|
|
350
|
-
if status_message is not None:
|
|
351
|
-
await self.set_status_message(status_message, is_terminal=True)
|
|
352
|
-
|
|
353
|
-
# Sleep for a bit so that the listeners have a chance to trigger
|
|
354
|
-
await asyncio.sleep(0.1)
|
|
355
|
-
|
|
356
|
-
if event_listeners_timeout:
|
|
357
|
-
await self.event_manager.wait_for_all_listeners_to_complete(timeout=event_listeners_timeout)
|
|
358
|
-
|
|
359
|
-
await self.event_manager.__aexit__(None, None, None)
|
|
360
|
-
await self._charging_manager_implementation.__aexit__(None, None, None)
|
|
361
|
-
|
|
362
|
-
await asyncio.wait_for(finalize(), cleanup_timeout.total_seconds())
|
|
363
|
-
self._is_initialized = False
|
|
364
|
-
|
|
365
|
-
if self._exit_process:
|
|
366
|
-
sys.exit(exit_code)
|
|
420
|
+
self.exit_code = exit_code
|
|
421
|
+
self.status_message = status_message
|
|
422
|
+
self._event_listeners_timeout = event_listeners_timeout
|
|
423
|
+
self._cleanup_timeout = cleanup_timeout
|
|
424
|
+
await self.__aexit__(None, None, None)
|
|
367
425
|
|
|
368
426
|
async def fail(
|
|
369
427
|
self,
|
|
@@ -372,23 +430,24 @@ class _ActorType:
|
|
|
372
430
|
exception: BaseException | None = None,
|
|
373
431
|
status_message: str | None = None,
|
|
374
432
|
) -> None:
|
|
375
|
-
"""Fail the Actor instance.
|
|
433
|
+
"""Fail the Actor instance without using context-manager syntax.
|
|
376
434
|
|
|
377
|
-
|
|
435
|
+
Equivalent to setting the `self.exit_code` and `self.status_message` properties and using
|
|
436
|
+
`await Actor.__aexit__()`.
|
|
378
437
|
|
|
379
438
|
Args:
|
|
380
439
|
exit_code: The exit code with which the Actor should fail (defaults to `1`).
|
|
381
440
|
exception: The exception with which the Actor failed.
|
|
382
441
|
status_message: The final status message that the Actor should display.
|
|
383
442
|
"""
|
|
384
|
-
self.
|
|
385
|
-
|
|
386
|
-
# In IPython, we don't run `sys.exit()` during Actor exits,
|
|
387
|
-
# so the exception traceback will be printed on its own
|
|
388
|
-
if exception and not is_running_in_ipython():
|
|
389
|
-
self.log.exception('Actor failed with an exception', exc_info=exception)
|
|
443
|
+
self.exit_code = exit_code
|
|
444
|
+
self.status_message = status_message
|
|
390
445
|
|
|
391
|
-
await self.
|
|
446
|
+
await self.__aexit__(
|
|
447
|
+
exc_type=type(exception) if exception else None,
|
|
448
|
+
exc_value=exception,
|
|
449
|
+
exc_traceback=exception.__traceback__ if exception else None,
|
|
450
|
+
)
|
|
392
451
|
|
|
393
452
|
def new_client(
|
|
394
453
|
self,
|
|
@@ -626,10 +685,6 @@ class _ActorType:
|
|
|
626
685
|
self._raise_if_not_initialized()
|
|
627
686
|
return self._charging_manager_implementation
|
|
628
687
|
|
|
629
|
-
@cached_property
|
|
630
|
-
def _charging_manager_implementation(self) -> ChargingManagerImplementation:
|
|
631
|
-
return ChargingManagerImplementation(self.configuration, self.apify_client)
|
|
632
|
-
|
|
633
688
|
async def charge(self, event_name: str, count: int = 1) -> ChargeResult:
|
|
634
689
|
"""Charge for a specified number of events - sub-operations of the Actor.
|
|
635
690
|
|
|
@@ -822,18 +877,6 @@ class _ActorType:
|
|
|
822
877
|
|
|
823
878
|
return ActorRun.model_validate(api_result)
|
|
824
879
|
|
|
825
|
-
def _get_remaining_time(self) -> timedelta | None:
|
|
826
|
-
"""Get time remaining from the Actor timeout. Returns `None` if not on an Apify platform."""
|
|
827
|
-
if self.is_at_home() and self.configuration.timeout_at:
|
|
828
|
-
return self.configuration.timeout_at - datetime.now(tz=timezone.utc)
|
|
829
|
-
|
|
830
|
-
self.log.warning(
|
|
831
|
-
'Returning `None` instead of remaining time. Using `RemainingTime` argument is only possible when the Actor'
|
|
832
|
-
' is running on the Apify platform and when the timeout for the Actor run is set. '
|
|
833
|
-
f'{self.is_at_home()=}, {self.configuration.timeout_at=}'
|
|
834
|
-
)
|
|
835
|
-
return None
|
|
836
|
-
|
|
837
880
|
async def abort(
|
|
838
881
|
self,
|
|
839
882
|
run_id: str,
|
|
@@ -1242,6 +1285,10 @@ class _ActorType:
|
|
|
1242
1285
|
|
|
1243
1286
|
return proxy_configuration
|
|
1244
1287
|
|
|
1288
|
+
def _raise_if_not_initialized(self) -> None:
|
|
1289
|
+
if not self._is_initialized:
|
|
1290
|
+
raise RuntimeError('The Actor was not initialized!')
|
|
1291
|
+
|
|
1245
1292
|
def _get_default_exit_process(self) -> bool:
|
|
1246
1293
|
"""Return False for IPython, Pytest, and Scrapy environments, True otherwise."""
|
|
1247
1294
|
if is_running_in_ipython():
|
|
@@ -1262,6 +1309,18 @@ class _ActorType:
|
|
|
1262
1309
|
|
|
1263
1310
|
return True
|
|
1264
1311
|
|
|
1312
|
+
def _get_remaining_time(self) -> timedelta | None:
|
|
1313
|
+
"""Get time remaining from the Actor timeout. Returns `None` if not on an Apify platform."""
|
|
1314
|
+
if self.is_at_home() and self.configuration.timeout_at:
|
|
1315
|
+
return self.configuration.timeout_at - datetime.now(tz=timezone.utc)
|
|
1316
|
+
|
|
1317
|
+
self.log.warning(
|
|
1318
|
+
'Returning `None` instead of remaining time. Using `RemainingTime` argument is only possible when the Actor'
|
|
1319
|
+
' is running on the Apify platform and when the timeout for the Actor run is set. '
|
|
1320
|
+
f'{self.is_at_home()=}, {self.configuration.timeout_at=}'
|
|
1321
|
+
)
|
|
1322
|
+
return None
|
|
1323
|
+
|
|
1265
1324
|
|
|
1266
1325
|
Actor = cast('_ActorType', Proxy(_ActorType))
|
|
1267
1326
|
"""The entry point of the SDK, through which all the Actor operations should be done."""
|
apify/_configuration.py
CHANGED
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from datetime import datetime, timedelta
|
|
4
4
|
from decimal import Decimal
|
|
5
5
|
from logging import getLogger
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
from typing import Annotated, Any
|
|
7
8
|
|
|
8
9
|
from pydantic import AliasChoices, BeforeValidator, Field, model_validator
|
|
@@ -421,6 +422,14 @@ class Configuration(CrawleeConfiguration):
|
|
|
421
422
|
logger.warning('Actor is running on the Apify platform, `disable_browser_sandbox` was changed to True.')
|
|
422
423
|
return self
|
|
423
424
|
|
|
425
|
+
@property
|
|
426
|
+
def canonical_input_key(self) -> str:
|
|
427
|
+
return str(Path(self.input_key).with_suffix('.json'))
|
|
428
|
+
|
|
429
|
+
@property
|
|
430
|
+
def input_key_candidates(self) -> set[str]:
|
|
431
|
+
return {self.input_key, self.canonical_input_key, Path(self.canonical_input_key).stem}
|
|
432
|
+
|
|
424
433
|
@classmethod
|
|
425
434
|
def get_global_configuration(cls) -> Configuration:
|
|
426
435
|
"""Retrieve the global instance of the configuration.
|
|
@@ -49,7 +49,7 @@ class ApifyHttpProxyMiddleware:
|
|
|
49
49
|
if proxy_settings is None:
|
|
50
50
|
Actor.log.info(
|
|
51
51
|
'ApifyHttpProxyMiddleware is not going to be used. Object "proxyConfiguration" is probably missing '
|
|
52
|
-
'
|
|
52
|
+
'in the Actor input.'
|
|
53
53
|
)
|
|
54
54
|
raise NotConfigured
|
|
55
55
|
|
|
@@ -12,8 +12,8 @@ from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest
|
|
|
12
12
|
from crawlee.storages import RequestQueue
|
|
13
13
|
|
|
14
14
|
from ._models import ApifyRequestQueueMetadata, RequestQueueStats
|
|
15
|
-
from ._request_queue_shared_client import
|
|
16
|
-
from ._request_queue_single_client import
|
|
15
|
+
from ._request_queue_shared_client import ApifyRequestQueueSharedClient
|
|
16
|
+
from ._request_queue_single_client import ApifyRequestQueueSingleClient
|
|
17
17
|
from ._utils import AliasResolver
|
|
18
18
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
@@ -47,14 +47,14 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
47
47
|
self._api_client = api_client
|
|
48
48
|
"""The Apify request queue client for API operations."""
|
|
49
49
|
|
|
50
|
-
self._implementation:
|
|
50
|
+
self._implementation: ApifyRequestQueueSingleClient | ApifyRequestQueueSharedClient
|
|
51
51
|
"""Internal implementation used to communicate with the Apify platform based Request Queue."""
|
|
52
52
|
if access == 'single':
|
|
53
|
-
self._implementation =
|
|
53
|
+
self._implementation = ApifyRequestQueueSingleClient(
|
|
54
54
|
api_client=self._api_client, metadata=metadata, cache_size=self._MAX_CACHED_REQUESTS
|
|
55
55
|
)
|
|
56
56
|
elif access == 'shared':
|
|
57
|
-
self._implementation =
|
|
57
|
+
self._implementation = ApifyRequestQueueSharedClient(
|
|
58
58
|
api_client=self._api_client,
|
|
59
59
|
metadata=metadata,
|
|
60
60
|
cache_size=self._MAX_CACHED_REQUESTS,
|
|
@@ -23,7 +23,7 @@ if TYPE_CHECKING:
|
|
|
23
23
|
logger = getLogger(__name__)
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
class
|
|
26
|
+
class ApifyRequestQueueSharedClient:
|
|
27
27
|
"""An Apify platform implementation of the request queue client.
|
|
28
28
|
|
|
29
29
|
This implementation supports multiple producers and multiple consumers scenario.
|
|
@@ -106,23 +106,19 @@ class _ApifyRequestQueueSharedClient:
|
|
|
106
106
|
# It could have been handled by another client in the meantime, so cached information about
|
|
107
107
|
# `request.was_already_handled` is not reliable.
|
|
108
108
|
already_present_requests.append(
|
|
109
|
-
ProcessedRequest
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
'wasAlreadyHandled': request.was_already_handled,
|
|
114
|
-
}
|
|
109
|
+
ProcessedRequest(
|
|
110
|
+
unique_key=request.unique_key,
|
|
111
|
+
was_already_present=True,
|
|
112
|
+
was_already_handled=request.was_already_handled,
|
|
115
113
|
)
|
|
116
114
|
)
|
|
117
115
|
|
|
118
116
|
else:
|
|
119
117
|
# Add new request to the cache.
|
|
120
|
-
processed_request = ProcessedRequest
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
'wasAlreadyHandled': request.was_already_handled,
|
|
125
|
-
}
|
|
118
|
+
processed_request = ProcessedRequest(
|
|
119
|
+
unique_key=request.unique_key,
|
|
120
|
+
was_already_present=True,
|
|
121
|
+
was_already_handled=request.was_already_handled,
|
|
126
122
|
)
|
|
127
123
|
self._cache_request(
|
|
128
124
|
request.unique_key,
|
|
@@ -21,7 +21,7 @@ if TYPE_CHECKING:
|
|
|
21
21
|
logger = getLogger(__name__)
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class
|
|
24
|
+
class ApifyRequestQueueSingleClient:
|
|
25
25
|
"""An Apify platform implementation of the request queue client with limited capability.
|
|
26
26
|
|
|
27
27
|
This client is designed to use as little resources as possible, but has to be used in constrained context.
|
|
@@ -108,23 +108,19 @@ class _ApifyRequestQueueSingleClient:
|
|
|
108
108
|
# Check if request is known to be already handled (it has to be present as well.)
|
|
109
109
|
if request.unique_key in self._requests_already_handled:
|
|
110
110
|
already_present_requests.append(
|
|
111
|
-
ProcessedRequest
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
'wasAlreadyHandled': True,
|
|
116
|
-
}
|
|
111
|
+
ProcessedRequest(
|
|
112
|
+
unique_key=request.unique_key,
|
|
113
|
+
was_already_present=True,
|
|
114
|
+
was_already_handled=True,
|
|
117
115
|
)
|
|
118
116
|
)
|
|
119
117
|
# Check if request is known to be already present, but unhandled
|
|
120
118
|
elif self._requests_cache.get(request.unique_key):
|
|
121
119
|
already_present_requests.append(
|
|
122
|
-
ProcessedRequest
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
'wasAlreadyHandled': request.was_already_handled,
|
|
127
|
-
}
|
|
120
|
+
ProcessedRequest(
|
|
121
|
+
unique_key=request.unique_key,
|
|
122
|
+
was_already_present=True,
|
|
123
|
+
was_already_handled=request.was_already_handled,
|
|
128
124
|
)
|
|
129
125
|
)
|
|
130
126
|
else:
|
|
@@ -158,8 +154,9 @@ class _ApifyRequestQueueSingleClient:
|
|
|
158
154
|
self._requests_cache.pop(unprocessed_request.unique_key, None)
|
|
159
155
|
|
|
160
156
|
else:
|
|
161
|
-
api_response = AddRequestsResponse
|
|
162
|
-
|
|
157
|
+
api_response = AddRequestsResponse(
|
|
158
|
+
unprocessed_requests=[],
|
|
159
|
+
processed_requests=already_present_requests,
|
|
163
160
|
)
|
|
164
161
|
|
|
165
162
|
# Update assumed total count for newly added requests.
|
|
@@ -236,20 +233,39 @@ class _ApifyRequestQueueSingleClient:
|
|
|
236
233
|
|
|
237
234
|
# Update the cached data
|
|
238
235
|
for request_data in response.get('items', []):
|
|
236
|
+
# Due to https://github.com/apify/apify-core/blob/v0.1377.0/src/api/src/lib/request_queues/request_queue.ts#L53,
|
|
237
|
+
# the list_head endpoint may return truncated fields for long requests (e.g., long URLs or unique keys).
|
|
238
|
+
# If truncation is detected, fetch the full request data by its ID from the API.
|
|
239
|
+
# This is a temporary workaround - the caching will be refactored to use request IDs instead of unique keys.
|
|
240
|
+
# See https://github.com/apify/apify-sdk-python/issues/630 for details.
|
|
241
|
+
if '[truncated]' in request_data['uniqueKey'] or '[truncated]' in request_data['url']:
|
|
242
|
+
request_data = await self._api_client.get_request(request_id=request_data['id']) # noqa: PLW2901
|
|
243
|
+
|
|
239
244
|
request = Request.model_validate(request_data)
|
|
240
245
|
|
|
241
246
|
if request.unique_key in self._requests_in_progress:
|
|
242
247
|
# Ignore requests that are already in progress, we will not process them again.
|
|
243
248
|
continue
|
|
249
|
+
|
|
244
250
|
if request.was_already_handled:
|
|
245
251
|
# Do not cache fully handled requests, we do not need them. Just cache their unique_key.
|
|
246
252
|
self._requests_already_handled.add(request.unique_key)
|
|
247
253
|
else:
|
|
248
254
|
# Only fetch the request if we do not know it yet.
|
|
249
255
|
if request.unique_key not in self._requests_cache:
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
256
|
+
request_id = unique_key_to_request_id(request.unique_key)
|
|
257
|
+
|
|
258
|
+
if request_data is not None and request_id != request_data['id']:
|
|
259
|
+
logger.warning(
|
|
260
|
+
f'Request ID mismatch: {request_id} != {request_data["id"]}, '
|
|
261
|
+
'this may cause unexpected behavior.'
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# See https://github.com/apify/apify-sdk-python/issues/630 for details.
|
|
265
|
+
if '[truncated]' not in request.unique_key:
|
|
266
|
+
request_data = await self._api_client.get_request(request_id=request_id) # noqa: PLW2901
|
|
267
|
+
request = Request.model_validate(request_data)
|
|
268
|
+
|
|
253
269
|
self._requests_cache[request.unique_key] = request
|
|
254
270
|
|
|
255
271
|
# Add new requests to the end of the head, unless already present in head
|
|
@@ -1,14 +1,19 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
|
-
|
|
3
|
+
import logging
|
|
4
4
|
|
|
5
|
-
from
|
|
5
|
+
from more_itertools import flatten
|
|
6
|
+
from typing_extensions import Self, override
|
|
6
7
|
|
|
7
8
|
from crawlee._consts import METADATA_FILENAME
|
|
9
|
+
from crawlee.configuration import Configuration as CrawleeConfiguration
|
|
8
10
|
from crawlee.storage_clients._file_system import FileSystemKeyValueStoreClient
|
|
11
|
+
from crawlee.storage_clients.models import KeyValueStoreRecord
|
|
9
12
|
|
|
10
13
|
from apify._configuration import Configuration
|
|
11
14
|
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
12
17
|
|
|
13
18
|
class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
|
|
14
19
|
"""Apify-specific implementation of the `FileSystemKeyValueStoreClient`.
|
|
@@ -17,6 +22,22 @@ class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
|
|
|
17
22
|
directory, except for the metadata file and the `INPUT.json` file.
|
|
18
23
|
"""
|
|
19
24
|
|
|
25
|
+
@override
|
|
26
|
+
@classmethod
|
|
27
|
+
async def open(
|
|
28
|
+
cls,
|
|
29
|
+
*,
|
|
30
|
+
id: str | None,
|
|
31
|
+
name: str | None,
|
|
32
|
+
alias: str | None,
|
|
33
|
+
configuration: CrawleeConfiguration,
|
|
34
|
+
) -> Self:
|
|
35
|
+
client = await super().open(id=id, name=name, alias=alias, configuration=configuration)
|
|
36
|
+
|
|
37
|
+
await client._sanitize_input_json_files() # noqa: SLF001 - it's okay, this is a factory method
|
|
38
|
+
|
|
39
|
+
return client
|
|
40
|
+
|
|
20
41
|
@override
|
|
21
42
|
async def purge(self) -> None:
|
|
22
43
|
"""Purges the key-value store by deleting all its contents.
|
|
@@ -24,16 +45,16 @@ class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
|
|
|
24
45
|
It deletes all files in the key-value store directory, except for the metadata file and
|
|
25
46
|
the `INPUT.json` file. It also updates the metadata to reflect that the store has been purged.
|
|
26
47
|
"""
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
# First try to find the alternative format of the input file and process it if it exists.
|
|
30
|
-
for file_path in self.path_to_kvs.glob('*'):
|
|
31
|
-
if file_path.name == f'{kvs_input_key}.json':
|
|
32
|
-
await self._process_input_json(file_path)
|
|
48
|
+
configuration = Configuration.get_global_configuration()
|
|
33
49
|
|
|
34
50
|
async with self._lock:
|
|
51
|
+
files_to_keep = set(
|
|
52
|
+
flatten([key, f'{key}.{METADATA_FILENAME}'] for key in configuration.input_key_candidates)
|
|
53
|
+
)
|
|
54
|
+
files_to_keep.add(METADATA_FILENAME)
|
|
55
|
+
|
|
35
56
|
for file_path in self.path_to_kvs.glob('*'):
|
|
36
|
-
if file_path.name in
|
|
57
|
+
if file_path.name in files_to_keep:
|
|
37
58
|
continue
|
|
38
59
|
if file_path.is_file():
|
|
39
60
|
await asyncio.to_thread(file_path.unlink, missing_ok=True)
|
|
@@ -43,15 +64,40 @@ class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
|
|
|
43
64
|
update_modified_at=True,
|
|
44
65
|
)
|
|
45
66
|
|
|
46
|
-
async def
|
|
47
|
-
"""
|
|
67
|
+
async def _sanitize_input_json_files(self) -> None:
|
|
68
|
+
"""Handle missing metadata for input files."""
|
|
69
|
+
configuration = Configuration.get_global_configuration()
|
|
70
|
+
alternative_keys = configuration.input_key_candidates - {configuration.canonical_input_key}
|
|
48
71
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
72
|
+
if (self.path_to_kvs / configuration.canonical_input_key).exists():
|
|
73
|
+
# Refresh metadata to prevent inconsistencies
|
|
74
|
+
input_data = await asyncio.to_thread(
|
|
75
|
+
lambda: json.loads((self.path_to_kvs / configuration.canonical_input_key).read_text())
|
|
76
|
+
)
|
|
77
|
+
await self.set_value(key=configuration.canonical_input_key, value=input_data)
|
|
78
|
+
|
|
79
|
+
for alternative_key in alternative_keys:
|
|
80
|
+
if (alternative_input_file := self.path_to_kvs / alternative_key).exists():
|
|
81
|
+
logger.warning(f'Redundant input file found: {alternative_input_file}')
|
|
82
|
+
else:
|
|
83
|
+
for alternative_key in alternative_keys:
|
|
84
|
+
alternative_input_file = self.path_to_kvs / alternative_key
|
|
85
|
+
|
|
86
|
+
# Only process files that actually exist
|
|
87
|
+
if alternative_input_file.exists():
|
|
88
|
+
# Refresh metadata to prevent inconsistencies
|
|
89
|
+
with alternative_input_file.open() as f:
|
|
90
|
+
input_data = await asyncio.to_thread(lambda: json.load(f))
|
|
91
|
+
await self.set_value(key=alternative_key, value=input_data)
|
|
92
|
+
|
|
93
|
+
@override
|
|
94
|
+
async def get_value(self, *, key: str) -> KeyValueStoreRecord | None:
|
|
95
|
+
configuration = Configuration.get_global_configuration()
|
|
96
|
+
|
|
97
|
+
if key in configuration.input_key_candidates:
|
|
98
|
+
for candidate in configuration.input_key_candidates:
|
|
99
|
+
value = await super().get_value(key=candidate)
|
|
100
|
+
if value is not None:
|
|
101
|
+
return value
|
|
102
|
+
|
|
103
|
+
return await super().get_value(key=key)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 3.0.
|
|
3
|
+
Version: 3.0.2
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
|
|
@@ -228,7 +228,7 @@ Requires-Python: >=3.10
|
|
|
228
228
|
Requires-Dist: apify-client<3.0.0,>=2.0.0
|
|
229
229
|
Requires-Dist: apify-shared<3.0.0,>=2.0.0
|
|
230
230
|
Requires-Dist: cachetools>=5.5.0
|
|
231
|
-
Requires-Dist: crawlee<2.0.0,>=1.0.
|
|
231
|
+
Requires-Dist: crawlee<2.0.0,>=1.0.2
|
|
232
232
|
Requires-Dist: cryptography>=42.0.0
|
|
233
233
|
Requires-Dist: impit>=0.6.1
|
|
234
234
|
Requires-Dist: lazy-object-proxy>=1.11.0
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
apify/__init__.py,sha256=HpgKg2FZWJuSPfDygzJ62psylhw4NN4tKFnoYUIhcd4,838
|
|
2
|
-
apify/_actor.py,sha256=
|
|
2
|
+
apify/_actor.py,sha256=kfrwD8gaeN4NcdNMD_Pj66agNh78jJjwMuNOuwLdo-E,57370
|
|
3
3
|
apify/_charging.py,sha256=KjZ2DnEMS0Tt8ibizmmt0RwBq8FOAsD1z-hKFgdazcY,13143
|
|
4
|
-
apify/_configuration.py,sha256=
|
|
4
|
+
apify/_configuration.py,sha256=7ZHhgRp98kr35zx4k4EB2aImq7Dq1FJjPg7r5bucv_M,14984
|
|
5
5
|
apify/_consts.py,sha256=CjhyEJ4Mi0lcIrzfqz8dN7nPJWGjCeBrrXQy1PZ6zRI,440
|
|
6
6
|
apify/_crypto.py,sha256=tqUs13QkemDtGzvU41pIA2HUEawpDlgzqbwKjm4I8kM,6852
|
|
7
7
|
apify/_models.py,sha256=EzU-inWeJ7T5HNVYEwnYb79W-q4OAPhtrYctfRYzpTE,7848
|
|
@@ -27,7 +27,7 @@ apify/scrapy/utils.py,sha256=Ssfa-P9-g9XYP1suDce6dQ8ta7PfijiPoMl2iplE6Ow,2126
|
|
|
27
27
|
apify/scrapy/extensions/__init__.py,sha256=cVQ8CCtOsJsRP28YKZWSUsi4FBwxI-yPJRNSXPFSa_o,98
|
|
28
28
|
apify/scrapy/extensions/_httpcache.py,sha256=XIS9vFCcUtdSfeKAKnxRnI9dX_GMmX2Od8OPnOaDhQ0,8870
|
|
29
29
|
apify/scrapy/middlewares/__init__.py,sha256=tfW-d3WFWLeNEjL8fTmon6NwgD-OXx1Bw2fBdU-wPy4,114
|
|
30
|
-
apify/scrapy/middlewares/apify_proxy.py,sha256=
|
|
30
|
+
apify/scrapy/middlewares/apify_proxy.py,sha256=EtR0958hGfBZ8MfwOIc_XyfBYPdMSkul-Ew4eYQCalM,5549
|
|
31
31
|
apify/scrapy/middlewares/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
32
|
apify/scrapy/pipelines/__init__.py,sha256=GWPeLN_Zwj8vRBWtXW6DaxdB7mvyQ7Jw5Tz1ccgWlZI,119
|
|
33
33
|
apify/scrapy/pipelines/actor_dataset_push.py,sha256=XUUyznQTD-E3wYUUFt2WAOnWhbnRrY0WuedlfYfYhDI,846
|
|
@@ -38,20 +38,20 @@ apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBs
|
|
|
38
38
|
apify/storage_clients/_apify/_dataset_client.py,sha256=Bb3UwOaFkyuEY7tuBf8K46R4ZP_b1EaAkDOXOqwSoW8,12498
|
|
39
39
|
apify/storage_clients/_apify/_key_value_store_client.py,sha256=42dARbLX2oeOW7uYYKkDyQbEriMuh55Mxh0SqvkOEGg,10529
|
|
40
40
|
apify/storage_clients/_apify/_models.py,sha256=GEaN7Got1zIg42QPH36obHRWRDVNtzOkRuOWYRf9bFU,4572
|
|
41
|
-
apify/storage_clients/_apify/_request_queue_client.py,sha256=
|
|
42
|
-
apify/storage_clients/_apify/_request_queue_shared_client.py,sha256=
|
|
43
|
-
apify/storage_clients/_apify/_request_queue_single_client.py,sha256=
|
|
41
|
+
apify/storage_clients/_apify/_request_queue_client.py,sha256=PUIVmGQxqFTkRxW9FIFWjT0OeDyAGt-ULlW-rdQDTyc,14194
|
|
42
|
+
apify/storage_clients/_apify/_request_queue_shared_client.py,sha256=uxkuIG1rgCArgs6agldC9vmB2bgrIlNnm1I214Gf6WA,20550
|
|
43
|
+
apify/storage_clients/_apify/_request_queue_single_client.py,sha256=EuORHJnFLC1YAT6ZfQj-ayrfSJNpU4_61r_7uDyvwgA,18092
|
|
44
44
|
apify/storage_clients/_apify/_storage_client.py,sha256=hFl_PuX1UgOydBD6pieZ0u2NWbDmZV-i0qygKdsuHt4,4873
|
|
45
45
|
apify/storage_clients/_apify/_utils.py,sha256=ywXoSM69amRokUZcshbAvQLIcSZq4L-bpYIGyeFxCGQ,7696
|
|
46
46
|
apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
47
|
apify/storage_clients/_file_system/__init__.py,sha256=rDbXatXV9wHKPhKTrXDzWnexhTm7sIJQWucMi-P-SD4,130
|
|
48
|
-
apify/storage_clients/_file_system/_key_value_store_client.py,sha256=
|
|
48
|
+
apify/storage_clients/_file_system/_key_value_store_client.py,sha256=gxM3ap67PnY80Rd7P3onPAf2pksYpU0LoAlJdayEMdU,4179
|
|
49
49
|
apify/storage_clients/_file_system/_storage_client.py,sha256=rcwpKYlrWzvlSA2xoxftg-EZAi_iGZ3vOCbu0C5lKDE,1396
|
|
50
50
|
apify/storage_clients/_smart_apify/__init__.py,sha256=614B2AaWY-dx6RQ6mod7VVR8gFh75-_jnq5BeDD7hSc,53
|
|
51
51
|
apify/storage_clients/_smart_apify/_storage_client.py,sha256=GCPmVe_xWAFcO2Cuej4su4i97_d33Q9Ih_Sc5xW2Wa4,4674
|
|
52
52
|
apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
|
|
53
53
|
apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
-
apify-3.0.
|
|
55
|
-
apify-3.0.
|
|
56
|
-
apify-3.0.
|
|
57
|
-
apify-3.0.
|
|
54
|
+
apify-3.0.2.dist-info/METADATA,sha256=YRyzVDZFQFOevC5s2rezm0kM-0OFXb2HarPNZlRHG9Y,22580
|
|
55
|
+
apify-3.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
56
|
+
apify-3.0.2.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
57
|
+
apify-3.0.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|