apify 2.7.1b17__py3-none-any.whl → 2.7.1b19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

apify/_actor.py CHANGED
@@ -5,6 +5,7 @@ import os
5
5
  import sys
6
6
  from contextlib import suppress
7
7
  from datetime import datetime, timedelta, timezone
8
+ from functools import cached_property
8
9
  from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast, overload
9
10
 
10
11
  from lazy_object_proxy import Proxy
@@ -14,6 +15,7 @@ from pydantic import AliasChoices
14
15
  from apify_client import ApifyClientAsync
15
16
  from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
16
17
  from crawlee import service_locator
18
+ from crawlee.errors import ServiceConflictError
17
19
  from crawlee.events import (
18
20
  Event,
19
21
  EventAbortingData,
@@ -23,6 +25,7 @@ from crawlee.events import (
23
25
  EventPersistStateData,
24
26
  EventSystemInfoData,
25
27
  )
28
+ from crawlee.storage_clients import FileSystemStorageClient
26
29
 
27
30
  from apify._charging import ChargeResult, ChargingManager, ChargingManagerImplementation
28
31
  from apify._configuration import Configuration
@@ -34,6 +37,7 @@ from apify._utils import docs_group, docs_name, get_system_info, is_running_in_i
34
37
  from apify.events import ApifyEventManager, EventManager, LocalEventManager
35
38
  from apify.log import _configure_logging, logger
36
39
  from apify.storage_clients import ApifyStorageClient
40
+ from apify.storage_clients._file_system import ApifyFileSystemStorageClient
37
41
  from apify.storages import Dataset, KeyValueStore, RequestQueue
38
42
 
39
43
  if TYPE_CHECKING:
@@ -119,28 +123,15 @@ class _ActorType:
119
123
  self._exit_process = self._get_default_exit_process() if exit_process is None else exit_process
120
124
  self._is_exiting = False
121
125
 
122
- self._configuration = configuration or Configuration.get_global_configuration()
123
- self._configure_logging = configure_logging
124
- self._apify_client = self.new_client()
126
+ # Actor state when this method is being executed is unpredictable.
127
+ # Actor can be initialized by lazy object proxy or by user directly, or by both.
128
+ # Until `init` method is run, this state of uncertainty remains. This is the reason why any setting done here in
129
+ # `__init__` method should not be considered final.
125
130
 
126
- # Create an instance of the cloud storage client, the local storage client is obtained
127
- # from the service locator.
128
- self._cloud_storage_client = ApifyStorageClient()
129
-
130
- # Set the event manager based on whether the Actor is running on the platform or locally.
131
- self._event_manager = (
132
- ApifyEventManager(
133
- configuration=self._configuration,
134
- persist_state_interval=self._configuration.persist_state_interval,
135
- )
136
- if self.is_at_home()
137
- else LocalEventManager(
138
- system_info_interval=self._configuration.system_info_interval,
139
- persist_state_interval=self._configuration.persist_state_interval,
140
- )
141
- )
142
-
143
- self._charging_manager = ChargingManagerImplementation(self._configuration, self._apify_client)
131
+ self._configuration = configuration
132
+ self._configure_logging = configure_logging
133
+ self._apify_client: ApifyClientAsync | None = None
134
+ self._local_storage_client: StorageClient | None = None
144
135
 
145
136
  self._is_initialized = False
146
137
 
@@ -198,32 +189,76 @@ class _ActorType:
198
189
  @property
199
190
  def apify_client(self) -> ApifyClientAsync:
200
191
  """The ApifyClientAsync instance the Actor instance uses."""
192
+ if not self._apify_client:
193
+ self._apify_client = self.new_client()
201
194
  return self._apify_client
202
195
 
203
- @property
196
+ @cached_property
204
197
  def configuration(self) -> Configuration:
205
198
  """The Configuration instance the Actor instance uses."""
206
- return self._configuration
199
+ if self._configuration:
200
+ return self._configuration
201
+
202
+ try:
203
+ # Set implicit default Apify configuration, unless configuration was already set.
204
+ implicit_configuration = Configuration()
205
+ service_locator.set_configuration(implicit_configuration)
206
+ self._configuration = implicit_configuration
207
+ except ServiceConflictError:
208
+ self.log.debug(
209
+ 'Configuration in service locator was set explicitly before Actor.init was called.'
210
+ 'Using the existing configuration as implicit configuration for the Actor.'
211
+ )
207
212
 
208
- @property
209
- def config(self) -> Configuration:
210
- """The Configuration instance the Actor instance uses."""
213
+ # Use the configuration from the service locator
214
+ self._configuration = Configuration.get_global_configuration()
211
215
  return self._configuration
212
216
 
213
- @property
217
+ @cached_property
214
218
  def event_manager(self) -> EventManager:
215
219
  """The EventManager instance the Actor instance uses."""
216
- return self._event_manager
220
+ return (
221
+ ApifyEventManager(
222
+ configuration=self.configuration,
223
+ persist_state_interval=self.configuration.persist_state_interval,
224
+ )
225
+ if self.is_at_home()
226
+ else LocalEventManager(
227
+ system_info_interval=self.configuration.system_info_interval,
228
+ persist_state_interval=self.configuration.persist_state_interval,
229
+ )
230
+ )
217
231
 
218
232
  @property
219
233
  def log(self) -> logging.Logger:
220
234
  """The logging.Logger instance the Actor uses."""
221
235
  return logger
222
236
 
223
- @property
224
- def _local_storage_client(self) -> StorageClient:
225
- """The local storage client the Actor instance uses."""
226
- return service_locator.get_storage_client()
237
+ def _get_local_storage_client(self) -> StorageClient:
238
+ """Get the local storage client the Actor instance uses."""
239
+ if self._local_storage_client:
240
+ return self._local_storage_client
241
+
242
+ try:
243
+ # Set implicit default local storage client, unless local storage client was already set.
244
+ implicit_storage_client = ApifyFileSystemStorageClient()
245
+ service_locator.set_storage_client(implicit_storage_client)
246
+ self._local_storage_client = implicit_storage_client
247
+ except ServiceConflictError:
248
+ self.log.debug(
249
+ 'Storage client in service locator was set explicitly before Actor.init was called.'
250
+ 'Using the existing storage client as implicit storage client for the Actor.'
251
+ )
252
+
253
+ self._local_storage_client = service_locator.get_storage_client()
254
+ if type(self._local_storage_client) is FileSystemStorageClient:
255
+ self.log.warning(
256
+ f'Using {FileSystemStorageClient.__module__}.{FileSystemStorageClient.__name__} in Actor context is not'
257
+ f' recommended and can lead to problems with reading the input file. Use '
258
+ f'`apify.storage_clients.FileSystemStorageClient` instead.'
259
+ )
260
+
261
+ return self._local_storage_client
227
262
 
228
263
  def _raise_if_not_initialized(self) -> None:
229
264
  if not self._is_initialized:
@@ -233,7 +268,7 @@ class _ActorType:
233
268
  if not force_cloud:
234
269
  return
235
270
 
236
- if not self.is_at_home() and self.config.token is None:
271
+ if not self.is_at_home() and self.configuration.token is None:
237
272
  raise RuntimeError(
238
273
  'In order to use the Apify cloud storage from your computer, '
239
274
  'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
@@ -250,12 +285,23 @@ class _ActorType:
250
285
  This method should be called immediately before performing any additional Actor actions, and it should be
251
286
  called only once.
252
287
  """
288
+ if self._configuration:
289
+ # Set explicitly the configuration in the service locator
290
+ service_locator.set_configuration(self.configuration)
291
+ else:
292
+ # Ensure that the configuration (cached property) is set
293
+ _ = self.configuration
294
+
253
295
  if self._is_initialized:
254
296
  raise RuntimeError('The Actor was already initialized!')
255
297
 
256
298
  if _ActorType._is_any_instance_initialized:
257
299
  self.log.warning('Repeated Actor initialization detected - this is non-standard usage, proceed with care')
258
300
 
301
+ # Create an instance of the cloud storage client, the local storage client is obtained
302
+ # from the service locator
303
+ self._cloud_storage_client = ApifyStorageClient()
304
+
259
305
  # Make sure that the currently initialized instance is also available through the global `Actor` proxy
260
306
  cast('Proxy', Actor).__wrapped__ = self
261
307
 
@@ -265,9 +311,11 @@ class _ActorType:
265
311
  # If the Actor is running on the Apify platform, we set the cloud storage client.
266
312
  if self.is_at_home():
267
313
  service_locator.set_storage_client(self._cloud_storage_client)
314
+ self._local_storage_client = self._cloud_storage_client
315
+ else:
316
+ self._get_local_storage_client()
268
317
 
269
318
  service_locator.set_event_manager(self.event_manager)
270
- service_locator.set_configuration(self.configuration)
271
319
 
272
320
  # The logging configuration has to be called after all service_locator set methods.
273
321
  if self._configure_logging:
@@ -279,10 +327,10 @@ class _ActorType:
279
327
  # TODO: Print outdated SDK version warning (we need a new env var for this)
280
328
  # https://github.com/apify/apify-sdk-python/issues/146
281
329
 
282
- await self._event_manager.__aenter__()
330
+ await self.event_manager.__aenter__()
283
331
  self.log.debug('Event manager initialized')
284
332
 
285
- await self._charging_manager.__aenter__()
333
+ await self._charging_manager_implementation.__aenter__()
286
334
  self.log.debug('Charging manager initialized')
287
335
 
288
336
  self._is_initialized = True
@@ -323,10 +371,10 @@ class _ActorType:
323
371
  await asyncio.sleep(0.1)
324
372
 
325
373
  if event_listeners_timeout:
326
- await self._event_manager.wait_for_all_listeners_to_complete(timeout=event_listeners_timeout)
374
+ await self.event_manager.wait_for_all_listeners_to_complete(timeout=event_listeners_timeout)
327
375
 
328
- await self._event_manager.__aexit__(None, None, None)
329
- await self._charging_manager.__aexit__(None, None, None)
376
+ await self.event_manager.__aexit__(None, None, None)
377
+ await self._charging_manager_implementation.__aexit__(None, None, None)
330
378
 
331
379
  await asyncio.wait_for(finalize(), cleanup_timeout.total_seconds())
332
380
  self._is_initialized = False
@@ -385,8 +433,8 @@ class _ActorType:
385
433
  (increases exponentially from this value).
386
434
  timeout: The socket timeout of the HTTP requests sent to the Apify API.
387
435
  """
388
- token = token or self._configuration.token
389
- api_url = api_url or self._configuration.api_base_url
436
+ token = token or self.configuration.token
437
+ api_url = api_url or self.configuration.api_base_url
390
438
  return ApifyClientAsync(
391
439
  token=token,
392
440
  api_url=api_url,
@@ -427,13 +475,13 @@ class _ActorType:
427
475
  self._raise_if_not_initialized()
428
476
  self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
429
477
 
430
- storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
478
+ storage_client = self._cloud_storage_client if force_cloud else self._get_local_storage_client()
431
479
 
432
480
  return await Dataset.open(
433
481
  id=id,
434
482
  alias=alias,
435
483
  name=name,
436
- configuration=self._configuration,
484
+ configuration=self.configuration,
437
485
  storage_client=storage_client,
438
486
  )
439
487
 
@@ -465,13 +513,14 @@ class _ActorType:
465
513
  """
466
514
  self._raise_if_not_initialized()
467
515
  self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
468
- storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
516
+
517
+ storage_client = self._cloud_storage_client if force_cloud else self._get_local_storage_client()
469
518
 
470
519
  return await KeyValueStore.open(
471
520
  id=id,
472
521
  alias=alias,
473
522
  name=name,
474
- configuration=self._configuration,
523
+ configuration=self.configuration,
475
524
  storage_client=storage_client,
476
525
  )
477
526
 
@@ -506,13 +555,13 @@ class _ActorType:
506
555
  self._raise_if_not_initialized()
507
556
  self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
508
557
 
509
- storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
558
+ storage_client = self._cloud_storage_client if force_cloud else self._get_local_storage_client()
510
559
 
511
560
  return await RequestQueue.open(
512
561
  id=id,
513
562
  alias=alias,
514
563
  name=name,
515
- configuration=self._configuration,
564
+ configuration=self.configuration,
516
565
  storage_client=storage_client,
517
566
  )
518
567
 
@@ -536,7 +585,7 @@ class _ActorType:
536
585
  data = data if isinstance(data, list) else [data]
537
586
 
538
587
  max_charged_count = (
539
- self._charging_manager.calculate_max_event_charge_count_within_limit(charged_event_name)
588
+ self.get_charging_manager().calculate_max_event_charge_count_within_limit(charged_event_name)
540
589
  if charged_event_name is not None
541
590
  else None
542
591
  )
@@ -550,7 +599,7 @@ class _ActorType:
550
599
  await dataset.push_data(data)
551
600
 
552
601
  if charged_event_name:
553
- return await self._charging_manager.charge(
602
+ return await self.get_charging_manager().charge(
554
603
  event_name=charged_event_name,
555
604
  count=min(max_charged_count, len(data)) if max_charged_count is not None else len(data),
556
605
  )
@@ -561,9 +610,9 @@ class _ActorType:
561
610
  """Get the Actor input value from the default key-value store associated with the current Actor run."""
562
611
  self._raise_if_not_initialized()
563
612
 
564
- input_value = await self.get_value(self._configuration.input_key)
565
- input_secrets_private_key = self._configuration.input_secrets_private_key_file
566
- input_secrets_key_passphrase = self._configuration.input_secrets_private_key_passphrase
613
+ input_value = await self.get_value(self.configuration.input_key)
614
+ input_secrets_private_key = self.configuration.input_secrets_private_key_file
615
+ input_secrets_key_passphrase = self.configuration.input_secrets_private_key_passphrase
567
616
  if input_secrets_private_key and input_secrets_key_passphrase:
568
617
  private_key = load_private_key(
569
618
  input_secrets_private_key,
@@ -607,7 +656,11 @@ class _ActorType:
607
656
  def get_charging_manager(self) -> ChargingManager:
608
657
  """Retrieve the charging manager to access granular pricing information."""
609
658
  self._raise_if_not_initialized()
610
- return self._charging_manager
659
+ return self._charging_manager_implementation
660
+
661
+ @cached_property
662
+ def _charging_manager_implementation(self) -> ChargingManagerImplementation:
663
+ return ChargingManagerImplementation(self.configuration, self.apify_client)
611
664
 
612
665
  async def charge(self, event_name: str, count: int = 1) -> ChargeResult:
613
666
  """Charge for a specified number of events - sub-operations of the Actor.
@@ -619,7 +672,7 @@ class _ActorType:
619
672
  count: Number of events to charge for.
620
673
  """
621
674
  self._raise_if_not_initialized()
622
- return await self._charging_manager.charge(event_name, count)
675
+ return await self.get_charging_manager().charge(event_name, count)
623
676
 
624
677
  @overload
625
678
  def on(
@@ -670,7 +723,7 @@ class _ActorType:
670
723
  """
671
724
  self._raise_if_not_initialized()
672
725
 
673
- self._event_manager.on(event=event_name, listener=listener)
726
+ self.event_manager.on(event=event_name, listener=listener)
674
727
  return listener
675
728
 
676
729
  @overload
@@ -696,11 +749,11 @@ class _ActorType:
696
749
  """
697
750
  self._raise_if_not_initialized()
698
751
 
699
- self._event_manager.off(event=event_name, listener=listener)
752
+ self.event_manager.off(event=event_name, listener=listener)
700
753
 
701
754
  def is_at_home(self) -> bool:
702
755
  """Return `True` when the Actor is running on the Apify platform, and `False` otherwise (e.g. local run)."""
703
- return self._configuration.is_at_home
756
+ return self.configuration.is_at_home
704
757
 
705
758
  def get_env(self) -> dict:
706
759
  """Return a dictionary with information parsed from all the `APIFY_XXX` environment variables.
@@ -726,7 +779,7 @@ class _ActorType:
726
779
  aliases = [field_name]
727
780
 
728
781
  for alias in aliases:
729
- config[alias] = getattr(self._configuration, field_name)
782
+ config[alias] = getattr(self.configuration, field_name)
730
783
 
731
784
  env_vars = {env_var.value.lower(): env_var.name.lower() for env_var in [*ActorEnvVars, *ApifyEnvVars]}
732
785
  return {option_name: config[env_var] for env_var, option_name in env_vars.items() if env_var in config}
@@ -771,7 +824,7 @@ class _ActorType:
771
824
  """
772
825
  self._raise_if_not_initialized()
773
826
 
774
- client = self.new_client(token=token) if token else self._apify_client
827
+ client = self.new_client(token=token) if token else self.apify_client
775
828
 
776
829
  if webhooks:
777
830
  serialized_webhooks = [
@@ -802,7 +855,7 @@ class _ActorType:
802
855
  return ActorRun.model_validate(api_result)
803
856
 
804
857
  def _get_remaining_time(self) -> timedelta | None:
805
- """Get time remaining from the actor timeout. Returns `None` if not on an Apify platform."""
858
+ """Get time remaining from the Actor timeout. Returns `None` if not on an Apify platform."""
806
859
  if self.is_at_home() and self.configuration.timeout_at:
807
860
  return self.configuration.timeout_at - datetime.now(tz=timezone.utc)
808
861
 
@@ -838,7 +891,7 @@ class _ActorType:
838
891
  """
839
892
  self._raise_if_not_initialized()
840
893
 
841
- client = self.new_client(token=token) if token else self._apify_client
894
+ client = self.new_client(token=token) if token else self.apify_client
842
895
 
843
896
  if status_message:
844
897
  await client.run(run_id).update(status_message=status_message)
@@ -891,7 +944,7 @@ class _ActorType:
891
944
  """
892
945
  self._raise_if_not_initialized()
893
946
 
894
- client = self.new_client(token=token) if token else self._apify_client
947
+ client = self.new_client(token=token) if token else self.apify_client
895
948
 
896
949
  if webhooks:
897
950
  serialized_webhooks = [
@@ -963,7 +1016,7 @@ class _ActorType:
963
1016
  """
964
1017
  self._raise_if_not_initialized()
965
1018
 
966
- client = self.new_client(token=token) if token else self._apify_client
1019
+ client = self.new_client(token=token) if token else self.apify_client
967
1020
 
968
1021
  if webhooks:
969
1022
  serialized_webhooks = [
@@ -1014,13 +1067,13 @@ class _ActorType:
1014
1067
  return
1015
1068
 
1016
1069
  if not custom_after_sleep:
1017
- custom_after_sleep = self._configuration.metamorph_after_sleep
1070
+ custom_after_sleep = self.configuration.metamorph_after_sleep
1018
1071
 
1019
- # If is_at_home() is True, config.actor_run_id is always set
1020
- if not self._configuration.actor_run_id:
1072
+ # If is_at_home() is True, configuration.actor_run_id is always set
1073
+ if not self.configuration.actor_run_id:
1021
1074
  raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
1022
1075
 
1023
- await self._apify_client.run(self._configuration.actor_run_id).metamorph(
1076
+ await self.apify_client.run(self.configuration.actor_run_id).metamorph(
1024
1077
  target_actor_id=target_actor_id,
1025
1078
  run_input=run_input,
1026
1079
  target_actor_build=target_actor_build,
@@ -1057,7 +1110,7 @@ class _ActorType:
1057
1110
  _ActorType._is_rebooting = True
1058
1111
 
1059
1112
  if not custom_after_sleep:
1060
- custom_after_sleep = self._configuration.metamorph_after_sleep
1113
+ custom_after_sleep = self.configuration.metamorph_after_sleep
1061
1114
 
1062
1115
  # Call all the listeners for the PERSIST_STATE and MIGRATING events, and wait for them to finish.
1063
1116
  # PERSIST_STATE listeners are called to allow the Actor to persist its state before the reboot.
@@ -1066,10 +1119,10 @@ class _ActorType:
1066
1119
  # We can't just emit the events and wait for all listeners to finish,
1067
1120
  # because this method might be called from an event listener itself, and we would deadlock.
1068
1121
  persist_state_listeners = flatten(
1069
- (self._event_manager._listeners_to_wrappers[Event.PERSIST_STATE] or {}).values() # noqa: SLF001
1122
+ (self.event_manager._listeners_to_wrappers[Event.PERSIST_STATE] or {}).values() # noqa: SLF001
1070
1123
  )
1071
1124
  migrating_listeners = flatten(
1072
- (self._event_manager._listeners_to_wrappers[Event.MIGRATING] or {}).values() # noqa: SLF001
1125
+ (self.event_manager._listeners_to_wrappers[Event.MIGRATING] or {}).values() # noqa: SLF001
1073
1126
  )
1074
1127
 
1075
1128
  await asyncio.gather(
@@ -1077,10 +1130,10 @@ class _ActorType:
1077
1130
  *[listener(EventMigratingData()) for listener in migrating_listeners],
1078
1131
  )
1079
1132
 
1080
- if not self._configuration.actor_run_id:
1133
+ if not self.configuration.actor_run_id:
1081
1134
  raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
1082
1135
 
1083
- await self._apify_client.run(self._configuration.actor_run_id).reboot()
1136
+ await self.apify_client.run(self.configuration.actor_run_id).reboot()
1084
1137
 
1085
1138
  if custom_after_sleep:
1086
1139
  await asyncio.sleep(custom_after_sleep.total_seconds())
@@ -1119,11 +1172,11 @@ class _ActorType:
1119
1172
  return
1120
1173
 
1121
1174
  # If is_at_home() is True, config.actor_run_id is always set
1122
- if not self._configuration.actor_run_id:
1175
+ if not self.configuration.actor_run_id:
1123
1176
  raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
1124
1177
 
1125
- await self._apify_client.webhooks().create(
1126
- actor_run_id=self._configuration.actor_run_id,
1178
+ await self.apify_client.webhooks().create(
1179
+ actor_run_id=self.configuration.actor_run_id,
1127
1180
  event_types=webhook.event_types,
1128
1181
  request_url=webhook.request_url,
1129
1182
  payload_template=webhook.payload_template,
@@ -1155,10 +1208,10 @@ class _ActorType:
1155
1208
  return None
1156
1209
 
1157
1210
  # If is_at_home() is True, config.actor_run_id is always set
1158
- if not self._configuration.actor_run_id:
1211
+ if not self.configuration.actor_run_id:
1159
1212
  raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
1160
1213
 
1161
- api_result = await self._apify_client.run(self._configuration.actor_run_id).update(
1214
+ api_result = await self.apify_client.run(self.configuration.actor_run_id).update(
1162
1215
  status_message=status_message, is_status_message_terminal=is_terminal
1163
1216
  )
1164
1217
 
@@ -1168,7 +1221,7 @@ class _ActorType:
1168
1221
  self,
1169
1222
  *,
1170
1223
  actor_proxy_input: dict
1171
- | None = None, # this is the raw proxy input from the actor run input, it is not spread or snake_cased in here
1224
+ | None = None, # this is the raw proxy input from the Actor run input, it is not spread or snake_cased in here
1172
1225
  password: str | None = None,
1173
1226
  groups: list[str] | None = None,
1174
1227
  country_code: str | None = None,
@@ -1213,7 +1266,7 @@ class _ActorType:
1213
1266
  country_code=country_code,
1214
1267
  proxy_urls=proxy_urls,
1215
1268
  new_url_function=new_url_function,
1216
- _actor_config=self._configuration,
1269
+ _actor_config=self.configuration,
1217
1270
  _apify_client=self._apify_client,
1218
1271
  )
1219
1272
 
apify/_configuration.py CHANGED
@@ -8,6 +8,7 @@ from typing import Annotated, Any
8
8
  from pydantic import AliasChoices, BeforeValidator, Field, model_validator
9
9
  from typing_extensions import Self, deprecated
10
10
 
11
+ from crawlee import service_locator
11
12
  from crawlee._utils.models import timedelta_ms
12
13
  from crawlee._utils.urls import validate_http_url
13
14
  from crawlee.configuration import Configuration as CrawleeConfiguration
@@ -424,11 +425,41 @@ class Configuration(CrawleeConfiguration):
424
425
  def get_global_configuration(cls) -> Configuration:
425
426
  """Retrieve the global instance of the configuration.
426
427
 
427
- Mostly for the backwards compatibility. It is recommended to use the `service_locator.get_configuration()`
428
- instead.
428
+ This method ensures that ApifyConfigration is returned, even if CrawleeConfiguration was set in the
429
+ service locator.
429
430
  """
430
- return cls()
431
+ global_configuration = service_locator.get_configuration()
431
432
 
433
+ if isinstance(global_configuration, Configuration):
434
+ # If Apify configuration was already stored in service locator, return it.
435
+ return global_configuration
432
436
 
433
- # Monkey-patch the base class so that it works with the extended configuration
434
- CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore[method-assign]
437
+ logger.warning(
438
+ 'Non Apify Configration is set in the `service_locator` in the SDK context. '
439
+ 'It is recommended to set `apify.Configuration` explicitly as early as possible by using '
440
+ 'service_locator.set_configuration'
441
+ )
442
+
443
+ return cls.from_configuration(global_configuration)
444
+
445
+ @classmethod
446
+ def from_configuration(cls, configuration: CrawleeConfiguration) -> Configuration:
447
+ """Create Apify Configuration from existing Crawlee Configuration.
448
+
449
+ Args:
450
+ configuration: The existing Crawlee Configuration.
451
+
452
+ Returns:
453
+ The created Apify Configuration.
454
+ """
455
+ apify_configuration = cls()
456
+
457
+ # Ensure the returned configuration is of type Apify Configuration.
458
+ # Most likely crawlee configuration was already set. Create Apify configuration from it.
459
+ # Due to known Pydantic issue https://github.com/pydantic/pydantic/issues/9516, creating new instance of
460
+ # Configuration from existing one in situation where environment can have some fields set by alias is very
461
+ # unpredictable. Use the stable workaround.
462
+ for name in configuration.model_fields:
463
+ setattr(apify_configuration, name, getattr(configuration, name))
464
+
465
+ return apify_configuration
@@ -11,8 +11,9 @@ from crawlee._utils.byte_size import ByteSize
11
11
  from crawlee._utils.file import json_dumps
12
12
  from crawlee.storage_clients._base import DatasetClient
13
13
  from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
14
+ from crawlee.storages import Dataset
14
15
 
15
- from ._utils import resolve_alias_to_id, store_alias_mapping
16
+ from ._utils import AliasResolver
16
17
 
17
18
  if TYPE_CHECKING:
18
19
  from collections.abc import AsyncIterator
@@ -126,19 +127,19 @@ class ApifyDatasetClient(DatasetClient):
126
127
  # Normalize 'default' alias to None
127
128
  alias = None if alias == 'default' else alias
128
129
 
129
- # Handle alias resolution
130
130
  if alias:
131
- # Try to resolve alias to existing storage ID
132
- resolved_id = await resolve_alias_to_id(alias, 'dataset', configuration)
133
- if resolved_id:
134
- id = resolved_id
135
- else:
136
- # Create a new storage and store the alias mapping
137
- new_storage_metadata = DatasetMetadata.model_validate(
138
- await apify_datasets_client.get_or_create(),
139
- )
140
- id = new_storage_metadata.id
141
- await store_alias_mapping(alias, 'dataset', id, configuration)
131
+ # Check if there is pre-existing alias mapping in the default KVS.
132
+ async with AliasResolver(storage_type=Dataset, alias=alias, configuration=configuration) as _alias:
133
+ id = await _alias.resolve_id()
134
+
135
+ # There was no pre-existing alias in the mapping.
136
+ # Create a new unnamed storage and store the mapping.
137
+ if id is None:
138
+ new_storage_metadata = DatasetMetadata.model_validate(
139
+ await apify_datasets_client.get_or_create(),
140
+ )
141
+ id = new_storage_metadata.id
142
+ await _alias.store_mapping(storage_id=id)
142
143
 
143
144
  # If name is provided, get or create the storage by name.
144
145
  elif name:
@@ -10,9 +10,10 @@ from yarl import URL
10
10
  from apify_client import ApifyClientAsync
11
11
  from crawlee.storage_clients._base import KeyValueStoreClient
12
12
  from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata
13
+ from crawlee.storages import KeyValueStore
13
14
 
14
15
  from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage
15
- from ._utils import resolve_alias_to_id, store_alias_mapping
16
+ from ._utils import AliasResolver
16
17
  from apify._crypto import create_hmac_signature
17
18
 
18
19
  if TYPE_CHECKING:
@@ -117,19 +118,20 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
117
118
  # Normalize 'default' alias to None
118
119
  alias = None if alias == 'default' else alias
119
120
 
120
- # Handle alias resolution
121
121
  if alias:
122
- # Try to resolve alias to existing storage ID
123
- resolved_id = await resolve_alias_to_id(alias, 'kvs', configuration)
124
- if resolved_id:
125
- id = resolved_id
126
- else:
127
- # Create a new storage and store the alias mapping
128
- new_storage_metadata = ApifyKeyValueStoreMetadata.model_validate(
129
- await apify_kvss_client.get_or_create(),
130
- )
131
- id = new_storage_metadata.id
132
- await store_alias_mapping(alias, 'kvs', id, configuration)
122
+ # Check if there is pre-existing alias mapping in the default KVS.
123
+ async with AliasResolver(storage_type=KeyValueStore, alias=alias, configuration=configuration) as _alias:
124
+ id = await _alias.resolve_id()
125
+
126
+ # There was no pre-existing alias in the mapping.
127
+ # Create a new unnamed storage and store the mapping.
128
+ if id is None:
129
+ # Create a new storage and store the alias mapping
130
+ new_storage_metadata = ApifyKeyValueStoreMetadata.model_validate(
131
+ await apify_kvss_client.get_or_create(),
132
+ )
133
+ id = new_storage_metadata.id
134
+ await _alias.store_mapping(storage_id=id)
133
135
 
134
136
  # If name is provided, get or create the storage by name.
135
137
  elif name:
@@ -5,7 +5,7 @@ from typing import Annotated
5
5
 
6
6
  from pydantic import BaseModel, ConfigDict, Field
7
7
 
8
- from crawlee.storage_clients.models import KeyValueStoreMetadata
8
+ from crawlee.storage_clients.models import KeyValueStoreMetadata, RequestQueueMetadata
9
9
 
10
10
  from apify import Request
11
11
  from apify._utils import docs_group
@@ -105,3 +105,27 @@ class CachedRequest(BaseModel):
105
105
 
106
106
  lock_expires_at: datetime | None = None
107
107
  """The expiration time of the lock on the request."""
108
+
109
+
110
+ class RequestQueueStats(BaseModel):
111
+ model_config = ConfigDict(populate_by_name=True)
112
+
113
+ delete_count: Annotated[int, Field(alias='deleteCount', default=0)]
114
+ """"The number of request queue deletes."""
115
+
116
+ head_item_read_count: Annotated[int, Field(alias='headItemReadCount', default=0)]
117
+ """The number of request queue head reads."""
118
+
119
+ read_count: Annotated[int, Field(alias='readCount', default=0)]
120
+ """The number of request queue reads."""
121
+
122
+ storage_bytes: Annotated[int, Field(alias='storageBytes', default=0)]
123
+ """Storage size in Bytes."""
124
+
125
+ write_count: Annotated[int, Field(alias='writeCount', default=0)]
126
+ """The number of request queue writes."""
127
+
128
+
129
+ class ApifyRequestQueueMetadata(RequestQueueMetadata):
130
+ stats: Annotated[RequestQueueStats, Field(alias='stats', default_factory=RequestQueueStats)]
131
+ """Additional statistics about the request queue."""
@@ -16,9 +16,16 @@ from apify_client import ApifyClientAsync
16
16
  from crawlee._utils.crypto import crypto_random_object_id
17
17
  from crawlee.storage_clients._base import RequestQueueClient
18
18
  from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
19
-
20
- from ._models import CachedRequest, ProlongRequestLockResponse, RequestQueueHead
21
- from ._utils import resolve_alias_to_id, store_alias_mapping
19
+ from crawlee.storages import RequestQueue
20
+
21
+ from ._models import (
22
+ ApifyRequestQueueMetadata,
23
+ CachedRequest,
24
+ ProlongRequestLockResponse,
25
+ RequestQueueHead,
26
+ RequestQueueStats,
27
+ )
28
+ from ._utils import AliasResolver
22
29
  from apify import Request
23
30
 
24
31
  if TYPE_CHECKING:
@@ -107,7 +114,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
107
114
  return self._metadata
108
115
 
109
116
  @override
110
- async def get_metadata(self) -> RequestQueueMetadata:
117
+ async def get_metadata(self) -> ApifyRequestQueueMetadata:
111
118
  """Get metadata about the request queue.
112
119
 
113
120
  Returns:
@@ -118,7 +125,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
118
125
  if response is None:
119
126
  raise ValueError('Failed to fetch request queue metadata from the API.')
120
127
  # Enhance API response by local estimations (API can be delayed few seconds, while local estimation not.)
121
- return RequestQueueMetadata(
128
+ return ApifyRequestQueueMetadata(
122
129
  id=response['id'],
123
130
  name=response['name'],
124
131
  total_request_count=max(response['totalRequestCount'], self._metadata.total_request_count),
@@ -128,6 +135,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
128
135
  modified_at=max(response['modifiedAt'], self._metadata.modified_at),
129
136
  accessed_at=max(response['accessedAt'], self._metadata.accessed_at),
130
137
  had_multiple_clients=response['hadMultipleClients'] or self._metadata.had_multiple_clients,
138
+ stats=RequestQueueStats.model_validate(response['stats'], by_alias=True),
131
139
  )
132
140
 
133
141
  @classmethod
@@ -195,19 +203,19 @@ class ApifyRequestQueueClient(RequestQueueClient):
195
203
  # Normalize 'default' alias to None
196
204
  alias = None if alias == 'default' else alias
197
205
 
198
- # Handle alias resolution
199
206
  if alias:
200
- # Try to resolve alias to existing storage ID
201
- resolved_id = await resolve_alias_to_id(alias, 'rq', configuration)
202
- if resolved_id:
203
- id = resolved_id
204
- else:
205
- # Create a new storage and store the alias mapping
206
- new_storage_metadata = RequestQueueMetadata.model_validate(
207
- await apify_rqs_client.get_or_create(),
208
- )
209
- id = new_storage_metadata.id
210
- await store_alias_mapping(alias, 'rq', id, configuration)
207
+ # Check if there is pre-existing alias mapping in the default KVS.
208
+ async with AliasResolver(storage_type=RequestQueue, alias=alias, configuration=configuration) as _alias:
209
+ id = await _alias.resolve_id()
210
+
211
+ # There was no pre-existing alias in the mapping.
212
+ # Create a new unnamed storage and store the mapping.
213
+ if id is None:
214
+ new_storage_metadata = RequestQueueMetadata.model_validate(
215
+ await apify_rqs_client.get_or_create(),
216
+ )
217
+ id = new_storage_metadata.id
218
+ await _alias.store_mapping(storage_id=id)
211
219
 
212
220
  # If name is provided, get or create the storage by name.
213
221
  elif name:
@@ -9,16 +9,35 @@ from crawlee.storage_clients._base import StorageClient
9
9
  from ._dataset_client import ApifyDatasetClient
10
10
  from ._key_value_store_client import ApifyKeyValueStoreClient
11
11
  from ._request_queue_client import ApifyRequestQueueClient
12
+ from ._utils import hash_api_base_url_and_token
13
+ from apify._configuration import Configuration as ApifyConfiguration
12
14
  from apify._utils import docs_group
13
15
 
14
16
  if TYPE_CHECKING:
15
- from crawlee.configuration import Configuration
17
+ from collections.abc import Hashable
18
+
19
+ from crawlee.configuration import Configuration as CrawleeConfiguration
16
20
 
17
21
 
18
22
  @docs_group('Storage clients')
19
23
  class ApifyStorageClient(StorageClient):
20
24
  """Apify storage client."""
21
25
 
26
+ # This class breaches Liskov Substitution Principle. It requires specialized Configuration compared to its parent.
27
+ _lsp_violation_error_message_template = (
28
+ 'Expected "configuration" to be an instance of "apify.Configuration", but got {} instead.'
29
+ )
30
+
31
+ @override
32
+ def get_additional_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
33
+ if isinstance(configuration, ApifyConfiguration):
34
+ return hash_api_base_url_and_token(configuration)
35
+
36
+ config_class = type(configuration)
37
+ raise TypeError(
38
+ self._lsp_violation_error_message_template.format(f'{config_class.__module__}.{config_class.__name__}')
39
+ )
40
+
22
41
  @override
23
42
  async def create_dataset_client(
24
43
  self,
@@ -26,19 +45,13 @@ class ApifyStorageClient(StorageClient):
26
45
  id: str | None = None,
27
46
  name: str | None = None,
28
47
  alias: str | None = None,
29
- configuration: Configuration | None = None,
48
+ configuration: CrawleeConfiguration | None = None,
30
49
  ) -> ApifyDatasetClient:
31
- # Import here to avoid circular imports.
32
- from apify import Configuration as ApifyConfiguration # noqa: PLC0415
33
-
34
50
  configuration = configuration or ApifyConfiguration.get_global_configuration()
35
51
  if isinstance(configuration, ApifyConfiguration):
36
52
  return await ApifyDatasetClient.open(id=id, name=name, alias=alias, configuration=configuration)
37
53
 
38
- raise TypeError(
39
- f'Expected "configuration" to be an instance of "apify.Configuration", '
40
- f'but got {type(configuration).__name__} instead.'
41
- )
54
+ raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))
42
55
 
43
56
  @override
44
57
  async def create_kvs_client(
@@ -47,19 +60,13 @@ class ApifyStorageClient(StorageClient):
47
60
  id: str | None = None,
48
61
  name: str | None = None,
49
62
  alias: str | None = None,
50
- configuration: Configuration | None = None,
63
+ configuration: CrawleeConfiguration | None = None,
51
64
  ) -> ApifyKeyValueStoreClient:
52
- # Import here to avoid circular imports.
53
- from apify import Configuration as ApifyConfiguration # noqa: PLC0415
54
-
55
65
  configuration = configuration or ApifyConfiguration.get_global_configuration()
56
66
  if isinstance(configuration, ApifyConfiguration):
57
67
  return await ApifyKeyValueStoreClient.open(id=id, name=name, alias=alias, configuration=configuration)
58
68
 
59
- raise TypeError(
60
- f'Expected "configuration" to be an instance of "apify.Configuration", '
61
- f'but got {type(configuration).__name__} instead.'
62
- )
69
+ raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))
63
70
 
64
71
  @override
65
72
  async def create_rq_client(
@@ -68,16 +75,10 @@ class ApifyStorageClient(StorageClient):
68
75
  id: str | None = None,
69
76
  name: str | None = None,
70
77
  alias: str | None = None,
71
- configuration: Configuration | None = None,
78
+ configuration: CrawleeConfiguration | None = None,
72
79
  ) -> ApifyRequestQueueClient:
73
- # Import here to avoid circular imports.
74
- from apify import Configuration as ApifyConfiguration # noqa: PLC0415
75
-
76
80
  configuration = configuration or ApifyConfiguration.get_global_configuration()
77
81
  if isinstance(configuration, ApifyConfiguration):
78
82
  return await ApifyRequestQueueClient.open(id=id, name=name, alias=alias, configuration=configuration)
79
83
 
80
- raise TypeError(
81
- f'Expected "configuration" to be an instance of "apify.Configuration", '
82
- f'but got {type(configuration).__name__} instead.'
83
- )
84
+ raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))
@@ -1,117 +1,167 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import logging
4
+ from asyncio import Lock
3
5
  from logging import getLogger
4
- from typing import TYPE_CHECKING, Literal
6
+ from typing import TYPE_CHECKING, ClassVar
5
7
 
6
8
  from apify_client import ApifyClientAsync
9
+ from crawlee._utils.crypto import compute_short_hash
10
+
11
+ from apify._configuration import Configuration
7
12
 
8
13
  if TYPE_CHECKING:
14
+ from types import TracebackType
15
+
9
16
  from apify_client.clients import KeyValueStoreClientAsync
17
+ from crawlee.storages import Dataset, KeyValueStore, RequestQueue
10
18
 
11
- from apify import Configuration
12
19
 
13
20
  logger = getLogger(__name__)
14
21
 
15
- _ALIAS_MAPPING_KEY = '__STORAGE_ALIASES_MAPPING'
16
-
17
-
18
- async def resolve_alias_to_id(
19
- alias: str,
20
- storage_type: Literal['dataset', 'kvs', 'rq'],
21
- configuration: Configuration,
22
- ) -> str | None:
23
- """Resolve a storage alias to its corresponding storage ID.
24
-
25
- Args:
26
- alias: The alias to resolve.
27
- storage_type: Type of storage ('dataset', 'key_value_store', or 'request_queue').
28
- configuration: The configuration object containing API credentials.
29
-
30
- Returns:
31
- The storage ID if found, None if the alias doesn't exist.
32
- """
33
- default_kvs_client = await _get_default_kvs_client(configuration)
34
-
35
- # Create the dictionary key for this alias.
36
- alias_key = f'alias-{storage_type}-{alias}'
37
-
38
- try:
39
- record = await default_kvs_client.get_record(_ALIAS_MAPPING_KEY)
40
-
41
- # get_record can return {key: ..., value: ..., content_type: ...}
42
- if isinstance(record, dict) and 'value' in record:
43
- record = record['value']
44
-
45
- # Extract the actual data from the KVS record
46
- if isinstance(record, dict) and alias_key in record:
47
- storage_id = record[alias_key]
48
- return str(storage_id)
49
22
 
50
- except Exception as exc:
51
- # If there's any error accessing the record, treat it as not found.
52
- logger.warning(f'Error accessing alias mapping for {alias}: {exc}')
23
+ class AliasResolver:
24
+ """Class for handling aliases.
53
25
 
54
- return None
55
-
56
-
57
- async def store_alias_mapping(
58
- alias: str,
59
- storage_type: Literal['dataset', 'kvs', 'rq'],
60
- storage_id: str,
61
- configuration: Configuration,
62
- ) -> None:
63
- """Store a mapping from alias to storage ID in the default key-value store.
64
-
65
- Args:
66
- alias: The alias to store.
67
- storage_type: Type of storage ('dataset', 'key_value_store', or 'request_queue').
68
- storage_id: The storage ID to map the alias to.
69
- configuration: The configuration object containing API credentials.
26
+ The purpose of this is class is to ensure that alias storages are created with correct id. This is achieved by using
27
+ default kvs as a storage for global mapping of aliases to storage ids. Same mapping is also kept in memory to avoid
28
+ unnecessary calls to API and also have limited support of alias storages when not running on Apify platform. When on
29
+ Apify platform, the storages created with alias are accessible by the same alias even after migration or reboot.
70
30
  """
71
- default_kvs_client = await _get_default_kvs_client(configuration)
72
-
73
- # Create the dictionary key for this alias.
74
- alias_key = f'alias-{storage_type}-{alias}'
75
-
76
- try:
77
- record = await default_kvs_client.get_record(_ALIAS_MAPPING_KEY)
78
-
79
- # get_record can return {key: ..., value: ..., content_type: ...}
80
- if isinstance(record, dict) and 'value' in record:
81
- record = record['value']
82
31
 
83
- # Update or create the record with the new alias mapping
84
- if isinstance(record, dict):
85
- record[alias_key] = storage_id
86
- else:
87
- record = {alias_key: storage_id}
88
-
89
- # Store the mapping back in the KVS.
90
- await default_kvs_client.set_record(_ALIAS_MAPPING_KEY, record)
91
- except Exception as exc:
92
- logger.warning(f'Error accessing alias mapping for {alias}: {exc}')
93
-
94
-
95
- async def _get_default_kvs_client(configuration: Configuration) -> KeyValueStoreClientAsync:
32
+ _alias_map: ClassVar[dict[str, str]] = {}
33
+ """Map containing pre-existing alias storages and their ids. Global for all instances."""
34
+ _alias_init_lock: Lock | None = None
35
+ """Lock for creating alias storages. Only one alias storage can be created at the time. Global for all instances."""
36
+
37
+ _ALIAS_STORAGE_KEY_SEPARATOR = ','
38
+ _ALIAS_MAPPING_KEY = '__STORAGE_ALIASES_MAPPING'
39
+
40
+ def __init__(
41
+ self, storage_type: type[Dataset | KeyValueStore | RequestQueue], alias: str, configuration: Configuration
42
+ ) -> None:
43
+ self._storage_type = storage_type
44
+ self._alias = alias
45
+ self._additional_cache_key = hash_api_base_url_and_token(configuration)
46
+
47
+ async def __aenter__(self) -> AliasResolver:
48
+ """Context manager to prevent race condition in alias creation."""
49
+ lock = await self._get_alias_init_lock()
50
+ await lock.acquire()
51
+ return self
52
+
53
+ async def __aexit__(
54
+ self, exc_type: type[BaseException] | None, exc_value: BaseException | None, exc_traceback: TracebackType | None
55
+ ) -> None:
56
+ lock = await self._get_alias_init_lock()
57
+ lock.release()
58
+
59
+ @classmethod
60
+ async def _get_alias_init_lock(cls) -> Lock:
61
+ """Get lock for controlling the creation of the alias storages.
62
+
63
+ The lock is shared for all instances of the AliasResolver class.
64
+ It is created in async method to ensure that some event loop is already running.
65
+ """
66
+ if cls._alias_init_lock is None:
67
+ cls._alias_init_lock = Lock()
68
+ return cls._alias_init_lock
69
+
70
+ @classmethod
71
+ async def _get_alias_map(cls) -> dict[str, str]:
72
+ """Get the aliases and storage ids mapping from the default kvs.
73
+
74
+ Mapping is loaded from kvs only once and is shared for all instances of the AliasResolver class.
75
+
76
+ Returns:
77
+ Map of aliases and storage ids.
78
+ """
79
+ if not cls._alias_map:
80
+ default_kvs_client = await _get_default_kvs_client()
81
+
82
+ record = await default_kvs_client.get_record(cls._ALIAS_MAPPING_KEY)
83
+
84
+ # get_record can return {key: ..., value: ..., content_type: ...}
85
+ if isinstance(record, dict):
86
+ if 'value' in record and isinstance(record['value'], dict):
87
+ cls._alias_map = record['value']
88
+ else:
89
+ cls._alias_map = record
90
+ else:
91
+ cls._alias_map = dict[str, str]()
92
+
93
+ return cls._alias_map
94
+
95
+ async def resolve_id(self) -> str | None:
96
+ """Get id of the aliased storage.
97
+
98
+ Either locate the id in the in-memory mapping or create the new storage.
99
+
100
+ Returns:
101
+ Storage id if it exists, None otherwise.
102
+ """
103
+ return (await self._get_alias_map()).get(self._storage_key, None)
104
+
105
+ async def store_mapping(self, storage_id: str) -> None:
106
+ """Add alias and related storage id to the mapping in default kvs and local in-memory mapping."""
107
+ # Update in-memory mapping
108
+ (await self._get_alias_map())[self._storage_key] = storage_id
109
+ if not Configuration.get_global_configuration().is_at_home:
110
+ logging.getLogger(__name__).warning(
111
+ 'AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.'
112
+ )
113
+ return
114
+
115
+ default_kvs_client = await _get_default_kvs_client()
116
+ await default_kvs_client.get()
117
+
118
+ try:
119
+ record = await default_kvs_client.get_record(self._ALIAS_MAPPING_KEY)
120
+
121
+ # get_record can return {key: ..., value: ..., content_type: ...}
122
+ if isinstance(record, dict) and 'value' in record:
123
+ record = record['value']
124
+
125
+ # Update or create the record with the new alias mapping
126
+ if isinstance(record, dict):
127
+ record[self._storage_key] = storage_id
128
+ else:
129
+ record = {self._storage_key: storage_id}
130
+
131
+ # Store the mapping back in the KVS.
132
+ await default_kvs_client.set_record(self._ALIAS_MAPPING_KEY, record)
133
+ except Exception as exc:
134
+ logger.warning(f'Error storing alias mapping for {self._alias}: {exc}')
135
+
136
+ @property
137
+ def _storage_key(self) -> str:
138
+ """Get a unique storage key used for storing the alias in the mapping."""
139
+ return self._ALIAS_STORAGE_KEY_SEPARATOR.join(
140
+ [
141
+ self._storage_type.__name__,
142
+ self._alias,
143
+ self._additional_cache_key,
144
+ ]
145
+ )
146
+
147
+
148
+ async def _get_default_kvs_client() -> KeyValueStoreClientAsync:
96
149
  """Get a client for the default key-value store."""
97
- token = configuration.token
98
- if not token:
99
- raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
150
+ configuration = Configuration.get_global_configuration()
100
151
 
101
- api_url = configuration.api_base_url
102
- if not api_url:
103
- raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
104
-
105
- # Create Apify client with the provided token and API URL
106
152
  apify_client_async = ApifyClientAsync(
107
- token=token,
108
- api_url=api_url,
153
+ token=configuration.token,
154
+ api_url=configuration.api_base_url,
109
155
  max_retries=8,
110
156
  min_delay_between_retries_millis=500,
111
157
  timeout_secs=360,
112
158
  )
113
159
 
114
- # Get the default key-value store ID from configuration
115
- default_kvs_id = configuration.default_key_value_store_id
160
+ return apify_client_async.key_value_store(key_value_store_id=configuration.default_key_value_store_id)
161
+
116
162
 
117
- return apify_client_async.key_value_store(key_value_store_id=default_kvs_id)
163
+ def hash_api_base_url_and_token(configuration: Configuration) -> str:
164
+ """Hash configuration.api_public_base_url and configuration.token in deterministic way."""
165
+ if configuration.api_public_base_url is None or configuration.token is None:
166
+ raise ValueError("'Configuration.api_public_base_url' and 'Configuration.token' must be set.")
167
+ return compute_short_hash(f'{configuration.api_public_base_url}{configuration.token}'.encode())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apify
3
- Version: 2.7.1b17
3
+ Version: 2.7.1b19
4
4
  Summary: Apify SDK for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
@@ -228,7 +228,7 @@ Requires-Python: >=3.10
228
228
  Requires-Dist: apify-client<3.0.0,>=2.0.0
229
229
  Requires-Dist: apify-shared<3.0.0,>=2.0.0
230
230
  Requires-Dist: cachetools>=5.5.0
231
- Requires-Dist: crawlee==0.6.13b37
231
+ Requires-Dist: crawlee==0.6.13b42
232
232
  Requires-Dist: cryptography>=42.0.0
233
233
  Requires-Dist: impit>=0.6.1
234
234
  Requires-Dist: lazy-object-proxy>=1.11.0
@@ -1,7 +1,7 @@
1
1
  apify/__init__.py,sha256=HpgKg2FZWJuSPfDygzJ62psylhw4NN4tKFnoYUIhcd4,838
2
- apify/_actor.py,sha256=i0pWWcysaLCZ64fJH9dznbpWF59BxG67QQjm3w0OO3s,53322
2
+ apify/_actor.py,sha256=2Y_e7t2zlkZbuW7rQME7WdC4kjsoZBoCs4KCuowjbQE,56108
3
3
  apify/_charging.py,sha256=KjZ2DnEMS0Tt8ibizmmt0RwBq8FOAsD1z-hKFgdazcY,13143
4
- apify/_configuration.py,sha256=Ta-qPlKenLaI0IOlODg_A4ZwmPtnuS7OWLSJyexcqmA,13283
4
+ apify/_configuration.py,sha256=DOKRjDGE2qU7LVPa5VZJzLShKGlkr7_207UOzfCAk_U,14676
5
5
  apify/_consts.py,sha256=CjhyEJ4Mi0lcIrzfqz8dN7nPJWGjCeBrrXQy1PZ6zRI,440
6
6
  apify/_crypto.py,sha256=tqUs13QkemDtGzvU41pIA2HUEawpDlgzqbwKjm4I8kM,6852
7
7
  apify/_models.py,sha256=EzU-inWeJ7T5HNVYEwnYb79W-q4OAPhtrYctfRYzpTE,7848
@@ -35,19 +35,19 @@ apify/scrapy/pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
35
35
  apify/storage_clients/__init__.py,sha256=9WLAKs2GnnP0yyKR0mc3AfJ1IqXF48V3KPMp6KaB8kU,277
36
36
  apify/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBsWa15Y5e7RMU,347
38
- apify/storage_clients/_apify/_dataset_client.py,sha256=tyx5QKbhyF9s2Y2gByVS2Pm3oE-MTvli6b8bkwQzMtk,12004
39
- apify/storage_clients/_apify/_key_value_store_client.py,sha256=gFt7AFIE6oAJ-g2rx7CG3r7ZWD0URUXqaw4Tk5zES0M,9916
40
- apify/storage_clients/_apify/_models.py,sha256=C6FpXswtO6kXE5RUumazm_conzJJS6PrXAGF9XBuDb8,3651
41
- apify/storage_clients/_apify/_request_queue_client.py,sha256=Ti48ncuVQkrf6RyoROgYy6MzZ2Tsmi_lurm-2UpWWSs,32528
42
- apify/storage_clients/_apify/_storage_client.py,sha256=iU67040i7Tmb-qoVZGwcgTF_qZUxsGQpAeV_oIBrpNU,3031
43
- apify/storage_clients/_apify/_utils.py,sha256=5fy0WErl9HFy78qUPw-fS4jgxMFg-NJvJkuP58WGHuY,3992
38
+ apify/storage_clients/_apify/_dataset_client.py,sha256=aKQsoIDLbuP6R8wo2DX6pzp-EHV5EWZlHWB8s8OLuG4,12096
39
+ apify/storage_clients/_apify/_key_value_store_client.py,sha256=EWcWE6HiXBGklCAilj4My3T95py7Nu6Zt3e_XzpySUM,10099
40
+ apify/storage_clients/_apify/_models.py,sha256=GEaN7Got1zIg42QPH36obHRWRDVNtzOkRuOWYRf9bFU,4572
41
+ apify/storage_clients/_apify/_request_queue_client.py,sha256=fkjdA0Kaq8CquiTfWOKGXxXqbTB1nNWDQ9wL98g0nTg,32807
42
+ apify/storage_clients/_apify/_storage_client.py,sha256=7oqn8-7zG7_cruw6jzmRl2htX2rOt-KPTzCRVNCcyTA,3304
43
+ apify/storage_clients/_apify/_utils.py,sha256=1g0oGqHmB-AeNfeMiB33om2bRZ_7wwhpijPXdWRPNsM,6609
44
44
  apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  apify/storage_clients/_file_system/__init__.py,sha256=rDbXatXV9wHKPhKTrXDzWnexhTm7sIJQWucMi-P-SD4,130
46
46
  apify/storage_clients/_file_system/_key_value_store_client.py,sha256=fnSJ1EIOPCGfcE6e5S3Tux9VbnMVLCJjugkaQoH_9yo,2267
47
47
  apify/storage_clients/_file_system/_storage_client.py,sha256=rcwpKYlrWzvlSA2xoxftg-EZAi_iGZ3vOCbu0C5lKDE,1396
48
48
  apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
49
49
  apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- apify-2.7.1b17.dist-info/METADATA,sha256=7yn182sNjFOwzP5f98YL5Ap18RYwBVDESLflDJdU8OE,22580
51
- apify-2.7.1b17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
52
- apify-2.7.1b17.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
53
- apify-2.7.1b17.dist-info/RECORD,,
50
+ apify-2.7.1b19.dist-info/METADATA,sha256=XVHTFOiCHcLju1Im2srBp_IoV7ZZ6tk5snx8HrWpFoM,22580
51
+ apify-2.7.1b19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
52
+ apify-2.7.1b19.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
53
+ apify-2.7.1b19.dist-info/RECORD,,