apify 2.7.1b20__py3-none-any.whl → 2.7.1b21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

apify/_actor.py CHANGED
@@ -25,7 +25,6 @@ from crawlee.events import (
25
25
  EventPersistStateData,
26
26
  EventSystemInfoData,
27
27
  )
28
- from crawlee.storage_clients import FileSystemStorageClient
29
28
 
30
29
  from apify._charging import ChargeResult, ChargingManager, ChargingManagerImplementation
31
30
  from apify._configuration import Configuration
@@ -38,6 +37,7 @@ from apify.events import ApifyEventManager, EventManager, LocalEventManager
38
37
  from apify.log import _configure_logging, logger
39
38
  from apify.storage_clients import ApifyStorageClient
40
39
  from apify.storage_clients._file_system import ApifyFileSystemStorageClient
40
+ from apify.storage_clients._smart_apify._storage_client import SmartApifyStorageClient
41
41
  from apify.storages import Dataset, KeyValueStore, RequestQueue
42
42
 
43
43
  if TYPE_CHECKING:
@@ -48,7 +48,6 @@ if TYPE_CHECKING:
48
48
  from typing_extensions import Self
49
49
 
50
50
  from crawlee.proxy_configuration import _NewUrlFunction
51
- from crawlee.storage_clients import StorageClient
52
51
 
53
52
  from apify._models import Webhook
54
53
 
@@ -131,7 +130,6 @@ class _ActorType:
131
130
  self._configuration = configuration
132
131
  self._configure_logging = configure_logging
133
132
  self._apify_client: ApifyClientAsync | None = None
134
- self._local_storage_client: StorageClient | None = None
135
133
 
136
134
  self._is_initialized = False
137
135
 
@@ -234,45 +232,42 @@ class _ActorType:
234
232
  """The logging.Logger instance the Actor uses."""
235
233
  return logger
236
234
 
237
- def _get_local_storage_client(self) -> StorageClient:
238
- """Get the local storage client the Actor instance uses."""
239
- if self._local_storage_client:
240
- return self._local_storage_client
235
+ def _raise_if_not_initialized(self) -> None:
236
+ if not self._is_initialized:
237
+ raise RuntimeError('The Actor was not initialized!')
238
+
239
+ @cached_property
240
+ def _storage_client(self) -> SmartApifyStorageClient:
241
+ """Storage client used by the actor.
241
242
 
243
+ Depending on the initialization of the service locator the client can be created in different ways.
244
+ """
242
245
  try:
243
- # Set implicit default local storage client, unless local storage client was already set.
244
- implicit_storage_client = ApifyFileSystemStorageClient()
246
+ # Nothing was set by the user.
247
+ implicit_storage_client = SmartApifyStorageClient(
248
+ local_storage_client=ApifyFileSystemStorageClient(), cloud_storage_client=ApifyStorageClient()
249
+ )
245
250
  service_locator.set_storage_client(implicit_storage_client)
246
- self._local_storage_client = implicit_storage_client
247
251
  except ServiceConflictError:
248
252
  self.log.debug(
249
253
  'Storage client in service locator was set explicitly before Actor.init was called.'
250
254
  'Using the existing storage client as implicit storage client for the Actor.'
251
255
  )
252
-
253
- self._local_storage_client = service_locator.get_storage_client()
254
- if type(self._local_storage_client) is FileSystemStorageClient:
255
- self.log.warning(
256
- f'Using {FileSystemStorageClient.__module__}.{FileSystemStorageClient.__name__} in Actor context is not'
257
- f' recommended and can lead to problems with reading the input file. Use '
258
- f'`apify.storage_clients.FileSystemStorageClient` instead.'
259
- )
260
-
261
- return self._local_storage_client
262
-
263
- def _raise_if_not_initialized(self) -> None:
264
- if not self._is_initialized:
265
- raise RuntimeError('The Actor was not initialized!')
266
-
267
- def _raise_if_cloud_requested_but_not_configured(self, *, force_cloud: bool) -> None:
268
- if not force_cloud:
269
- return
270
-
271
- if not self.is_at_home() and self.configuration.token is None:
272
- raise RuntimeError(
273
- 'In order to use the Apify cloud storage from your computer, '
274
- 'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
275
- )
256
+ else:
257
+ return implicit_storage_client
258
+
259
+ # User set something in the service locator.
260
+ explicit_storage_client = service_locator.get_storage_client()
261
+ if isinstance(explicit_storage_client, SmartApifyStorageClient):
262
+ # The client was manually set to the right type in the service locator. This is the explicit way.
263
+ return explicit_storage_client
264
+
265
+ raise RuntimeError(
266
+ 'The storage client in the service locator has to be instance of SmartApifyStorageClient. If you want to '
267
+ 'set the storage client manually you have to call '
268
+ '`service_locator.set_storage_client(SmartApifyStorageClient(...))` before entering Actor context or '
269
+ 'awaiting `Actor.init`.'
270
+ )
276
271
 
277
272
  async def init(self) -> None:
278
273
  """Initialize the Actor instance.
@@ -285,6 +280,7 @@ class _ActorType:
285
280
  This method should be called immediately before performing any additional Actor actions, and it should be
286
281
  called only once.
287
282
  """
283
+ self.log.info('Initializing Actor...')
288
284
  if self._configuration:
289
285
  # Set explicitly the configuration in the service locator
290
286
  service_locator.set_configuration(self.configuration)
@@ -298,22 +294,13 @@ class _ActorType:
298
294
  if _ActorType._is_any_instance_initialized:
299
295
  self.log.warning('Repeated Actor initialization detected - this is non-standard usage, proceed with care')
300
296
 
301
- # Create an instance of the cloud storage client, the local storage client is obtained
302
- # from the service locator
303
- self._cloud_storage_client = ApifyStorageClient()
304
-
305
297
  # Make sure that the currently initialized instance is also available through the global `Actor` proxy
306
298
  cast('Proxy', Actor).__wrapped__ = self
307
299
 
308
300
  self._is_exiting = False
309
301
  self._was_final_persist_state_emitted = False
310
302
 
311
- # If the Actor is running on the Apify platform, we set the cloud storage client.
312
- if self.is_at_home():
313
- service_locator.set_storage_client(self._cloud_storage_client)
314
- self._local_storage_client = self._cloud_storage_client
315
- else:
316
- self._get_local_storage_client()
303
+ self.log.debug(f'Storage client set to {self._storage_client}')
317
304
 
318
305
  service_locator.set_event_manager(self.event_manager)
319
306
 
@@ -321,7 +308,6 @@ class _ActorType:
321
308
  if self._configure_logging:
322
309
  _configure_logging()
323
310
 
324
- self.log.info('Initializing Actor...')
325
311
  self.log.info('System info', extra=get_system_info())
326
312
 
327
313
  await self.event_manager.__aenter__()
@@ -470,16 +456,11 @@ class _ActorType:
470
456
  An instance of the `Dataset` class for the given ID or name.
471
457
  """
472
458
  self._raise_if_not_initialized()
473
- self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
474
-
475
- storage_client = self._cloud_storage_client if force_cloud else self._get_local_storage_client()
476
-
477
459
  return await Dataset.open(
478
460
  id=id,
479
- alias=alias,
480
461
  name=name,
481
- configuration=self.configuration,
482
- storage_client=storage_client,
462
+ alias=alias,
463
+ storage_client=self._storage_client.get_suitable_storage_client(force_cloud=force_cloud),
483
464
  )
484
465
 
485
466
  async def open_key_value_store(
@@ -509,16 +490,11 @@ class _ActorType:
509
490
  An instance of the `KeyValueStore` class for the given ID or name.
510
491
  """
511
492
  self._raise_if_not_initialized()
512
- self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
513
-
514
- storage_client = self._cloud_storage_client if force_cloud else self._get_local_storage_client()
515
-
516
493
  return await KeyValueStore.open(
517
494
  id=id,
518
- alias=alias,
519
495
  name=name,
520
- configuration=self.configuration,
521
- storage_client=storage_client,
496
+ alias=alias,
497
+ storage_client=self._storage_client.get_suitable_storage_client(force_cloud=force_cloud),
522
498
  )
523
499
 
524
500
  async def open_request_queue(
@@ -550,16 +526,11 @@ class _ActorType:
550
526
  An instance of the `RequestQueue` class for the given ID or name.
551
527
  """
552
528
  self._raise_if_not_initialized()
553
- self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
554
-
555
- storage_client = self._cloud_storage_client if force_cloud else self._get_local_storage_client()
556
-
557
529
  return await RequestQueue.open(
558
530
  id=id,
559
- alias=alias,
560
531
  name=name,
561
- configuration=self.configuration,
562
- storage_client=storage_client,
532
+ alias=alias,
533
+ storage_client=self._storage_client.get_suitable_storage_client(force_cloud=force_cloud),
563
534
  )
564
535
 
565
536
  @overload
@@ -2,9 +2,11 @@ from crawlee.storage_clients import MemoryStorageClient
2
2
 
3
3
  from ._apify import ApifyStorageClient
4
4
  from ._file_system import ApifyFileSystemStorageClient as FileSystemStorageClient
5
+ from ._smart_apify import SmartApifyStorageClient
5
6
 
6
7
  __all__ = [
7
8
  'ApifyStorageClient',
8
9
  'FileSystemStorageClient',
9
10
  'MemoryStorageClient',
11
+ 'SmartApifyStorageClient',
10
12
  ]