apify 2.7.1b19__py3-none-any.whl → 2.7.1b21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

apify/_actor.py CHANGED
@@ -25,7 +25,6 @@ from crawlee.events import (
25
25
  EventPersistStateData,
26
26
  EventSystemInfoData,
27
27
  )
28
- from crawlee.storage_clients import FileSystemStorageClient
29
28
 
30
29
  from apify._charging import ChargeResult, ChargingManager, ChargingManagerImplementation
31
30
  from apify._configuration import Configuration
@@ -38,6 +37,7 @@ from apify.events import ApifyEventManager, EventManager, LocalEventManager
38
37
  from apify.log import _configure_logging, logger
39
38
  from apify.storage_clients import ApifyStorageClient
40
39
  from apify.storage_clients._file_system import ApifyFileSystemStorageClient
40
+ from apify.storage_clients._smart_apify._storage_client import SmartApifyStorageClient
41
41
  from apify.storages import Dataset, KeyValueStore, RequestQueue
42
42
 
43
43
  if TYPE_CHECKING:
@@ -48,7 +48,6 @@ if TYPE_CHECKING:
48
48
  from typing_extensions import Self
49
49
 
50
50
  from crawlee.proxy_configuration import _NewUrlFunction
51
- from crawlee.storage_clients import StorageClient
52
51
 
53
52
  from apify._models import Webhook
54
53
 
@@ -131,7 +130,6 @@ class _ActorType:
131
130
  self._configuration = configuration
132
131
  self._configure_logging = configure_logging
133
132
  self._apify_client: ApifyClientAsync | None = None
134
- self._local_storage_client: StorageClient | None = None
135
133
 
136
134
  self._is_initialized = False
137
135
 
@@ -234,45 +232,42 @@ class _ActorType:
234
232
  """The logging.Logger instance the Actor uses."""
235
233
  return logger
236
234
 
237
- def _get_local_storage_client(self) -> StorageClient:
238
- """Get the local storage client the Actor instance uses."""
239
- if self._local_storage_client:
240
- return self._local_storage_client
235
+ def _raise_if_not_initialized(self) -> None:
236
+ if not self._is_initialized:
237
+ raise RuntimeError('The Actor was not initialized!')
238
+
239
+ @cached_property
240
+ def _storage_client(self) -> SmartApifyStorageClient:
241
+ """Storage client used by the actor.
241
242
 
243
+ Depending on the initialization of the service locator the client can be created in different ways.
244
+ """
242
245
  try:
243
- # Set implicit default local storage client, unless local storage client was already set.
244
- implicit_storage_client = ApifyFileSystemStorageClient()
246
+ # Nothing was set by the user.
247
+ implicit_storage_client = SmartApifyStorageClient(
248
+ local_storage_client=ApifyFileSystemStorageClient(), cloud_storage_client=ApifyStorageClient()
249
+ )
245
250
  service_locator.set_storage_client(implicit_storage_client)
246
- self._local_storage_client = implicit_storage_client
247
251
  except ServiceConflictError:
248
252
  self.log.debug(
249
253
  'Storage client in service locator was set explicitly before Actor.init was called.'
250
254
  'Using the existing storage client as implicit storage client for the Actor.'
251
255
  )
252
-
253
- self._local_storage_client = service_locator.get_storage_client()
254
- if type(self._local_storage_client) is FileSystemStorageClient:
255
- self.log.warning(
256
- f'Using {FileSystemStorageClient.__module__}.{FileSystemStorageClient.__name__} in Actor context is not'
257
- f' recommended and can lead to problems with reading the input file. Use '
258
- f'`apify.storage_clients.FileSystemStorageClient` instead.'
259
- )
260
-
261
- return self._local_storage_client
262
-
263
- def _raise_if_not_initialized(self) -> None:
264
- if not self._is_initialized:
265
- raise RuntimeError('The Actor was not initialized!')
266
-
267
- def _raise_if_cloud_requested_but_not_configured(self, *, force_cloud: bool) -> None:
268
- if not force_cloud:
269
- return
270
-
271
- if not self.is_at_home() and self.configuration.token is None:
272
- raise RuntimeError(
273
- 'In order to use the Apify cloud storage from your computer, '
274
- 'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
275
- )
256
+ else:
257
+ return implicit_storage_client
258
+
259
+ # User set something in the service locator.
260
+ explicit_storage_client = service_locator.get_storage_client()
261
+ if isinstance(explicit_storage_client, SmartApifyStorageClient):
262
+ # The client was manually set to the right type in the service locator. This is the explicit way.
263
+ return explicit_storage_client
264
+
265
+ raise RuntimeError(
266
+ 'The storage client in the service locator has to be instance of SmartApifyStorageClient. If you want to '
267
+ 'set the storage client manually you have to call '
268
+ '`service_locator.set_storage_client(SmartApifyStorageClient(...))` before entering Actor context or '
269
+ 'awaiting `Actor.init`.'
270
+ )
276
271
 
277
272
  async def init(self) -> None:
278
273
  """Initialize the Actor instance.
@@ -285,6 +280,7 @@ class _ActorType:
285
280
  This method should be called immediately before performing any additional Actor actions, and it should be
286
281
  called only once.
287
282
  """
283
+ self.log.info('Initializing Actor...')
288
284
  if self._configuration:
289
285
  # Set explicitly the configuration in the service locator
290
286
  service_locator.set_configuration(self.configuration)
@@ -298,22 +294,13 @@ class _ActorType:
298
294
  if _ActorType._is_any_instance_initialized:
299
295
  self.log.warning('Repeated Actor initialization detected - this is non-standard usage, proceed with care')
300
296
 
301
- # Create an instance of the cloud storage client, the local storage client is obtained
302
- # from the service locator
303
- self._cloud_storage_client = ApifyStorageClient()
304
-
305
297
  # Make sure that the currently initialized instance is also available through the global `Actor` proxy
306
298
  cast('Proxy', Actor).__wrapped__ = self
307
299
 
308
300
  self._is_exiting = False
309
301
  self._was_final_persist_state_emitted = False
310
302
 
311
- # If the Actor is running on the Apify platform, we set the cloud storage client.
312
- if self.is_at_home():
313
- service_locator.set_storage_client(self._cloud_storage_client)
314
- self._local_storage_client = self._cloud_storage_client
315
- else:
316
- self._get_local_storage_client()
303
+ self.log.debug(f'Storage client set to {self._storage_client}')
317
304
 
318
305
  service_locator.set_event_manager(self.event_manager)
319
306
 
@@ -321,12 +308,8 @@ class _ActorType:
321
308
  if self._configure_logging:
322
309
  _configure_logging()
323
310
 
324
- self.log.info('Initializing Actor...')
325
311
  self.log.info('System info', extra=get_system_info())
326
312
 
327
- # TODO: Print outdated SDK version warning (we need a new env var for this)
328
- # https://github.com/apify/apify-sdk-python/issues/146
329
-
330
313
  await self.event_manager.__aenter__()
331
314
  self.log.debug('Event manager initialized')
332
315
 
@@ -473,16 +456,11 @@ class _ActorType:
473
456
  An instance of the `Dataset` class for the given ID or name.
474
457
  """
475
458
  self._raise_if_not_initialized()
476
- self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
477
-
478
- storage_client = self._cloud_storage_client if force_cloud else self._get_local_storage_client()
479
-
480
459
  return await Dataset.open(
481
460
  id=id,
482
- alias=alias,
483
461
  name=name,
484
- configuration=self.configuration,
485
- storage_client=storage_client,
462
+ alias=alias,
463
+ storage_client=self._storage_client.get_suitable_storage_client(force_cloud=force_cloud),
486
464
  )
487
465
 
488
466
  async def open_key_value_store(
@@ -512,16 +490,11 @@ class _ActorType:
512
490
  An instance of the `KeyValueStore` class for the given ID or name.
513
491
  """
514
492
  self._raise_if_not_initialized()
515
- self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
516
-
517
- storage_client = self._cloud_storage_client if force_cloud else self._get_local_storage_client()
518
-
519
493
  return await KeyValueStore.open(
520
494
  id=id,
521
- alias=alias,
522
495
  name=name,
523
- configuration=self.configuration,
524
- storage_client=storage_client,
496
+ alias=alias,
497
+ storage_client=self._storage_client.get_suitable_storage_client(force_cloud=force_cloud),
525
498
  )
526
499
 
527
500
  async def open_request_queue(
@@ -553,16 +526,11 @@ class _ActorType:
553
526
  An instance of the `RequestQueue` class for the given ID or name.
554
527
  """
555
528
  self._raise_if_not_initialized()
556
- self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
557
-
558
- storage_client = self._cloud_storage_client if force_cloud else self._get_local_storage_client()
559
-
560
529
  return await RequestQueue.open(
561
530
  id=id,
562
- alias=alias,
563
531
  name=name,
564
- configuration=self.configuration,
565
- storage_client=storage_client,
532
+ alias=alias,
533
+ storage_client=self._storage_client.get_suitable_storage_client(force_cloud=force_cloud),
566
534
  )
567
535
 
568
536
  @overload
apify/_configuration.py CHANGED
@@ -142,7 +142,7 @@ class Configuration(CrawleeConfiguration):
142
142
  ] = None
143
143
 
144
144
  default_dataset_id: Annotated[
145
- str,
145
+ str | None,
146
146
  Field(
147
147
  validation_alias=AliasChoices(
148
148
  'actor_default_dataset_id',
@@ -150,10 +150,10 @@ class Configuration(CrawleeConfiguration):
150
150
  ),
151
151
  description='Default dataset ID used by the Apify storage client when no ID or name is provided.',
152
152
  ),
153
- ] = 'default'
153
+ ] = None
154
154
 
155
155
  default_key_value_store_id: Annotated[
156
- str,
156
+ str | None,
157
157
  Field(
158
158
  validation_alias=AliasChoices(
159
159
  'actor_default_key_value_store_id',
@@ -161,10 +161,10 @@ class Configuration(CrawleeConfiguration):
161
161
  ),
162
162
  description='Default key-value store ID for the Apify storage client when no ID or name is provided.',
163
163
  ),
164
- ] = 'default'
164
+ ] = None
165
165
 
166
166
  default_request_queue_id: Annotated[
167
- str,
167
+ str | None,
168
168
  Field(
169
169
  validation_alias=AliasChoices(
170
170
  'actor_default_request_queue_id',
@@ -172,7 +172,7 @@ class Configuration(CrawleeConfiguration):
172
172
  ),
173
173
  description='Default request queue ID for the Apify storage client when no ID or name is provided.',
174
174
  ),
175
- ] = 'default'
175
+ ] = None
176
176
 
177
177
  disable_outdated_warning: Annotated[
178
178
  bool,
@@ -2,9 +2,11 @@ from crawlee.storage_clients import MemoryStorageClient
2
2
 
3
3
  from ._apify import ApifyStorageClient
4
4
  from ._file_system import ApifyFileSystemStorageClient as FileSystemStorageClient
5
+ from ._smart_apify import SmartApifyStorageClient
5
6
 
6
7
  __all__ = [
7
8
  'ApifyStorageClient',
8
9
  'FileSystemStorageClient',
9
10
  'MemoryStorageClient',
11
+ 'SmartApifyStorageClient',
10
12
  ]
@@ -124,8 +124,10 @@ class ApifyDatasetClient(DatasetClient):
124
124
  )
125
125
  apify_datasets_client = apify_client_async.datasets()
126
126
 
127
- # Normalize 'default' alias to None
128
- alias = None if alias == 'default' else alias
127
+ # Normalize unnamed default storage in cases where not defined in `configuration.default_dataset_id` to unnamed
128
+ # storage aliased as `__default__`
129
+ if not any([alias, name, id, configuration.default_dataset_id]):
130
+ alias = '__default__'
129
131
 
130
132
  if alias:
131
133
  # Check if there is pre-existing alias mapping in the default KVS.
@@ -150,6 +152,11 @@ class ApifyDatasetClient(DatasetClient):
150
152
  # If none are provided, try to get the default storage ID from environment variables.
151
153
  elif id is None:
152
154
  id = configuration.default_dataset_id
155
+ if not id:
156
+ raise ValueError(
157
+ 'Dataset "id", "name", or "alias" must be specified, '
158
+ 'or a default dataset ID must be set in the configuration.'
159
+ )
153
160
 
154
161
  # Now create the client for the determined ID
155
162
  apify_dataset_client = apify_client_async.dataset(dataset_id=id)
@@ -115,8 +115,10 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
115
115
  )
116
116
  apify_kvss_client = apify_client_async.key_value_stores()
117
117
 
118
- # Normalize 'default' alias to None
119
- alias = None if alias == 'default' else alias
118
+ # Normalize unnamed default storage in cases where not defined in `configuration.default_key_value_store_id` to
119
+ # unnamed storage aliased as `__default__`
120
+ if not any([alias, name, id, configuration.default_key_value_store_id]):
121
+ alias = '__default__'
120
122
 
121
123
  if alias:
122
124
  # Check if there is pre-existing alias mapping in the default KVS.
@@ -142,6 +144,11 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
142
144
  # If none are provided, try to get the default storage ID from environment variables.
143
145
  elif id is None:
144
146
  id = configuration.default_key_value_store_id
147
+ if not id:
148
+ raise ValueError(
149
+ 'KeyValueStore "id", "name", or "alias" must be specified, '
150
+ 'or a default KeyValueStore ID must be set in the configuration.'
151
+ )
145
152
 
146
153
  # Now create the client for the determined ID
147
154
  apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)