apify 2.7.1b14__py3-none-any.whl → 2.7.1b16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

apify/_actor.py CHANGED
@@ -401,6 +401,7 @@ class _ActorType:
401
401
  self,
402
402
  *,
403
403
  id: str | None = None,
404
+ alias: str | None = None,
404
405
  name: str | None = None,
405
406
  force_cloud: bool = False,
406
407
  ) -> Dataset:
@@ -411,10 +412,12 @@ class _ActorType:
411
412
  the Apify cloud.
412
413
 
413
414
  Args:
414
- id: ID of the dataset to be opened. If neither `id` nor `name` are provided, the method returns
415
- the default dataset associated with the Actor run.
416
- name: Name of the dataset to be opened. If neither `id` nor `name` are provided, the method returns
417
- the default dataset associated with the Actor run.
415
+ id: The ID of the dataset to open. If provided, searches for existing dataset by ID.
416
+ Mutually exclusive with name and alias.
417
+ name: The name of the dataset to open (global scope, persists across runs).
418
+ Mutually exclusive with id and alias.
419
+ alias: The alias of the dataset to open (run scope, creates unnamed storage).
420
+ Mutually exclusive with id and name.
418
421
  force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
419
422
  to combine local and cloud storage.
420
423
 
@@ -428,6 +431,7 @@ class _ActorType:
428
431
 
429
432
  return await Dataset.open(
430
433
  id=id,
434
+ alias=alias,
431
435
  name=name,
432
436
  configuration=self._configuration,
433
437
  storage_client=storage_client,
@@ -437,6 +441,7 @@ class _ActorType:
437
441
  self,
438
442
  *,
439
443
  id: str | None = None,
444
+ alias: str | None = None,
440
445
  name: str | None = None,
441
446
  force_cloud: bool = False,
442
447
  ) -> KeyValueStore:
@@ -446,10 +451,12 @@ class _ActorType:
446
451
  and retrieved using a unique key. The actual data is stored either on a local filesystem or in the Apify cloud.
447
452
 
448
453
  Args:
449
- id: ID of the key-value store to be opened. If neither `id` nor `name` are provided, the method returns
450
- the default key-value store associated with the Actor run.
451
- name: Name of the key-value store to be opened. If neither `id` nor `name` are provided, the method
452
- returns the default key-value store associated with the Actor run.
454
+ id: The ID of the KVS to open. If provided, searches for existing KVS by ID.
455
+ Mutually exclusive with name and alias.
456
+ name: The name of the KVS to open (global scope, persists across runs).
457
+ Mutually exclusive with id and alias.
458
+ alias: The alias of the KVS to open (run scope, creates unnamed storage).
459
+ Mutually exclusive with id and name.
453
460
  force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
454
461
  to combine local and cloud storage.
455
462
 
@@ -462,6 +469,7 @@ class _ActorType:
462
469
 
463
470
  return await KeyValueStore.open(
464
471
  id=id,
472
+ alias=alias,
465
473
  name=name,
466
474
  configuration=self._configuration,
467
475
  storage_client=storage_client,
@@ -471,6 +479,7 @@ class _ActorType:
471
479
  self,
472
480
  *,
473
481
  id: str | None = None,
482
+ alias: str | None = None,
474
483
  name: str | None = None,
475
484
  force_cloud: bool = False,
476
485
  ) -> RequestQueue:
@@ -482,10 +491,12 @@ class _ActorType:
482
491
  crawling orders.
483
492
 
484
493
  Args:
485
- id: ID of the request queue to be opened. If neither `id` nor `name` are provided, the method returns
486
- the default request queue associated with the Actor run.
487
- name: Name of the request queue to be opened. If neither `id` nor `name` are provided, the method returns
488
- the default request queue associated with the Actor run.
494
+ id: The ID of the RQ to open. If provided, searches for existing RQ by ID.
495
+ Mutually exclusive with name and alias.
496
+ name: The name of the RQ to open (global scope, persists across runs).
497
+ Mutually exclusive with id and alias.
498
+ alias: The alias of the RQ to open (run scope, creates unnamed storage).
499
+ Mutually exclusive with id and name.
489
500
  force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
490
501
  to combine local and cloud storage.
491
502
 
@@ -499,6 +510,7 @@ class _ActorType:
499
510
 
500
511
  return await RequestQueue.open(
501
512
  id=id,
513
+ alias=alias,
502
514
  name=name,
503
515
  configuration=self._configuration,
504
516
  storage_client=storage_client,
apify/events/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
- from crawlee.events import EventManager, LocalEventManager
1
+ from crawlee.events import Event, EventManager, LocalEventManager
2
2
 
3
3
  from ._apify_event_manager import ApifyEventManager
4
4
 
5
- __all__ = ['ApifyEventManager', 'EventManager', 'LocalEventManager']
5
+ __all__ = ['ApifyEventManager', 'Event', 'EventManager', 'LocalEventManager']
@@ -12,6 +12,8 @@ from crawlee._utils.file import json_dumps
12
12
  from crawlee.storage_clients._base import DatasetClient
13
13
  from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
14
14
 
15
+ from ._utils import resolve_alias_to_id, store_alias_mapping
16
+
15
17
  if TYPE_CHECKING:
16
18
  from collections.abc import AsyncIterator
17
19
 
@@ -66,6 +68,7 @@ class ApifyDatasetClient(DatasetClient):
66
68
  *,
67
69
  id: str | None,
68
70
  name: str | None,
71
+ alias: str | None,
69
72
  configuration: Configuration,
70
73
  ) -> ApifyDatasetClient:
71
74
  """Open an Apify dataset client.
@@ -74,22 +77,27 @@ class ApifyDatasetClient(DatasetClient):
74
77
  It handles authentication, storage lookup/creation, and metadata retrieval.
75
78
 
76
79
  Args:
77
- id: The ID of an existing dataset to open. If provided, the client will connect to this specific storage.
78
- Cannot be used together with `name`.
79
- name: The name of a dataset to get or create. If a storage with this name exists, it will be opened;
80
- otherwise, a new one will be created. Cannot be used together with `id`.
80
+ id: The ID of the dataset to open. If provided, searches for existing dataset by ID.
81
+ Mutually exclusive with name and alias.
82
+ name: The name of the dataset to open (global scope, persists across runs).
83
+ Mutually exclusive with id and alias.
84
+ alias: The alias of the dataset to open (run scope, creates unnamed storage).
85
+ Mutually exclusive with id and name.
81
86
  configuration: The configuration object containing API credentials and settings. Must include a valid
82
87
  `token` and `api_base_url`. May also contain a `default_dataset_id` for fallback when neither
83
- `id` nor `name` is provided.
88
+ `id`, `name`, nor `alias` is provided.
84
89
 
85
90
  Returns:
86
91
  An instance for the opened or created storage client.
87
92
 
88
93
  Raises:
89
- ValueError: If the configuration is missing required fields (token, api_base_url), if both `id` and `name`
90
- are provided, or if neither `id` nor `name` is provided and no default storage ID is available in
91
- the configuration.
94
+ ValueError: If the configuration is missing required fields (token, api_base_url), if more than one of
95
+ `id`, `name`, or `alias` is provided, or if none are provided and no default storage ID is available
96
+ in the configuration.
92
97
  """
98
+ if sum(1 for param in [id, name, alias] if param is not None) > 1:
99
+ raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
100
+
93
101
  token = configuration.token
94
102
  if not token:
95
103
  raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
@@ -115,27 +123,35 @@ class ApifyDatasetClient(DatasetClient):
115
123
  )
116
124
  apify_datasets_client = apify_client_async.datasets()
117
125
 
118
- # If both id and name are provided, raise an error.
119
- if id and name:
120
- raise ValueError('Only one of "id" or "name" can be specified, not both.')
126
+ # Normalize 'default' alias to None
127
+ alias = None if alias == 'default' else alias
121
128
 
122
- # If id is provided, get the storage by ID.
123
- if id and name is None:
124
- apify_dataset_client = apify_client_async.dataset(dataset_id=id)
129
+ # Handle alias resolution
130
+ if alias:
131
+ # Try to resolve alias to existing storage ID
132
+ resolved_id = await resolve_alias_to_id(alias, 'dataset', configuration)
133
+ if resolved_id:
134
+ id = resolved_id
135
+ else:
136
+ # Create a new storage and store the alias mapping
137
+ new_storage_metadata = DatasetMetadata.model_validate(
138
+ await apify_datasets_client.get_or_create(),
139
+ )
140
+ id = new_storage_metadata.id
141
+ await store_alias_mapping(alias, 'dataset', id, configuration)
125
142
 
126
143
  # If name is provided, get or create the storage by name.
127
- if name and id is None:
144
+ elif name:
128
145
  id = DatasetMetadata.model_validate(
129
146
  await apify_datasets_client.get_or_create(name=name),
130
147
  ).id
131
- apify_dataset_client = apify_client_async.dataset(dataset_id=id)
132
148
 
133
- # If both id and name are None, try to get the default storage ID from environment variables.
134
- # The default storage ID environment variable is set by the Apify platform. It also contains
135
- # a new storage ID after Actor's reboot or migration.
136
- if id is None and name is None:
149
+ # If none are provided, try to get the default storage ID from environment variables.
150
+ elif id is None:
137
151
  id = configuration.default_dataset_id
138
- apify_dataset_client = apify_client_async.dataset(dataset_id=id)
152
+
153
+ # Now create the client for the determined ID
154
+ apify_dataset_client = apify_client_async.dataset(dataset_id=id)
139
155
 
140
156
  # Fetch its metadata.
141
157
  metadata = await apify_dataset_client.get()
@@ -150,7 +166,7 @@ class ApifyDatasetClient(DatasetClient):
150
166
  # Verify that the storage exists by fetching its metadata again.
151
167
  metadata = await apify_dataset_client.get()
152
168
  if metadata is None:
153
- raise ValueError(f'Opening dataset with id={id} and name={name} failed.')
169
+ raise ValueError(f'Opening dataset with id={id}, name={name}, and alias={alias} failed.')
154
170
 
155
171
  return cls(
156
172
  api_client=apify_dataset_client,
@@ -12,6 +12,7 @@ from crawlee.storage_clients._base import KeyValueStoreClient
12
12
  from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata
13
13
 
14
14
  from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage
15
+ from ._utils import resolve_alias_to_id, store_alias_mapping
15
16
  from apify._crypto import create_hmac_signature
16
17
 
17
18
  if TYPE_CHECKING:
@@ -58,6 +59,7 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
58
59
  *,
59
60
  id: str | None,
60
61
  name: str | None,
62
+ alias: str | None,
61
63
  configuration: Configuration,
62
64
  ) -> ApifyKeyValueStoreClient:
63
65
  """Open an Apify key-value store client.
@@ -66,22 +68,27 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
66
68
  It handles authentication, storage lookup/creation, and metadata retrieval.
67
69
 
68
70
  Args:
69
- id: The ID of an existing key-value store to open. If provided, the client will connect to this specific
70
- storage. Cannot be used together with `name`.
71
- name: The name of a key-value store to get or create. If a storage with this name exists, it will be
72
- opened; otherwise, a new one will be created. Cannot be used together with `id`.
71
+ id: The ID of the KVS to open. If provided, searches for existing KVS by ID.
72
+ Mutually exclusive with name and alias.
73
+ name: The name of the KVS to open (global scope, persists across runs).
74
+ Mutually exclusive with id and alias.
75
+ alias: The alias of the KVS to open (run scope, creates unnamed storage).
76
+ Mutually exclusive with id and name.
73
77
  configuration: The configuration object containing API credentials and settings. Must include a valid
74
78
  `token` and `api_base_url`. May also contain a `default_key_value_store_id` for fallback when
75
- neither `id` nor `name` is provided.
79
+ neither `id`, `name`, nor `alias` is provided.
76
80
 
77
81
  Returns:
78
82
  An instance for the opened or created storage client.
79
83
 
80
84
  Raises:
81
- ValueError: If the configuration is missing required fields (token, api_base_url), if both `id` and `name`
82
- are provided, or if neither `id` nor `name` is provided and no default storage ID is available
85
+ ValueError: If the configuration is missing required fields (token, api_base_url), if more than one of
86
+ `id`, `name`, or `alias` is provided, or if none are provided and no default storage ID is available
83
87
  in the configuration.
84
88
  """
89
+ if sum(1 for param in [id, name, alias] if param is not None) > 1:
90
+ raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
91
+
85
92
  token = configuration.token
86
93
  if not token:
87
94
  raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
@@ -107,27 +114,35 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
107
114
  )
108
115
  apify_kvss_client = apify_client_async.key_value_stores()
109
116
 
110
- # If both id and name are provided, raise an error.
111
- if id and name:
112
- raise ValueError('Only one of "id" or "name" can be specified, not both.')
113
-
114
- # If id is provided, get the storage by ID.
115
- if id and name is None:
116
- apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
117
+ # Normalize 'default' alias to None
118
+ alias = None if alias == 'default' else alias
119
+
120
+ # Handle alias resolution
121
+ if alias:
122
+ # Try to resolve alias to existing storage ID
123
+ resolved_id = await resolve_alias_to_id(alias, 'kvs', configuration)
124
+ if resolved_id:
125
+ id = resolved_id
126
+ else:
127
+ # Create a new storage and store the alias mapping
128
+ new_storage_metadata = ApifyKeyValueStoreMetadata.model_validate(
129
+ await apify_kvss_client.get_or_create(),
130
+ )
131
+ id = new_storage_metadata.id
132
+ await store_alias_mapping(alias, 'kvs', id, configuration)
117
133
 
118
134
  # If name is provided, get or create the storage by name.
119
- if name and id is None:
135
+ elif name:
120
136
  id = ApifyKeyValueStoreMetadata.model_validate(
121
137
  await apify_kvss_client.get_or_create(name=name),
122
138
  ).id
123
- apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
124
139
 
125
- # If both id and name are None, try to get the default storage ID from environment variables.
126
- # The default storage ID environment variable is set by the Apify platform. It also contains
127
- # a new storage ID after Actor's reboot or migration.
128
- if id is None and name is None:
140
+ # If none are provided, try to get the default storage ID from environment variables.
141
+ elif id is None:
129
142
  id = configuration.default_key_value_store_id
130
- apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
143
+
144
+ # Now create the client for the determined ID
145
+ apify_kvs_client = apify_client_async.key_value_store(key_value_store_id=id)
131
146
 
132
147
  # Fetch its metadata.
133
148
  metadata = await apify_kvs_client.get()
@@ -142,7 +157,7 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
142
157
  # Verify that the storage exists by fetching its metadata again.
143
158
  metadata = await apify_kvs_client.get()
144
159
  if metadata is None:
145
- raise ValueError(f'Opening key-value store with id={id} and name={name} failed.')
160
+ raise ValueError(f'Opening key-value store with id={id}, name={name}, and alias={alias} failed.')
146
161
 
147
162
  return cls(
148
163
  api_client=apify_kvs_client,
@@ -18,6 +18,7 @@ from crawlee.storage_clients._base import RequestQueueClient
18
18
  from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
19
19
 
20
20
  from ._models import CachedRequest, ProlongRequestLockResponse, RequestQueueHead
21
+ from ._utils import resolve_alias_to_id, store_alias_mapping
21
22
  from apify import Request
22
23
 
23
24
  if TYPE_CHECKING:
@@ -135,6 +136,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
135
136
  *,
136
137
  id: str | None,
137
138
  name: str | None,
139
+ alias: str | None,
138
140
  configuration: Configuration,
139
141
  ) -> ApifyRequestQueueClient:
140
142
  """Open an Apify request queue client.
@@ -144,22 +146,27 @@ class ApifyRequestQueueClient(RequestQueueClient):
144
146
  management structures.
145
147
 
146
148
  Args:
147
- id: The ID of an existing request queue to open. If provided, the client will connect to this specific
148
- storage. Cannot be used together with `name`.
149
- name: The name of a request queue to get or create. If a storage with this name exists, it will be opened;
150
- otherwise, a new one will be created. Cannot be used together with `id`.
149
+ id: The ID of the RQ to open. If provided, searches for existing RQ by ID.
150
+ Mutually exclusive with name and alias.
151
+ name: The name of the RQ to open (global scope, persists across runs).
152
+ Mutually exclusive with id and alias.
153
+ alias: The alias of the RQ to open (run scope, creates unnamed storage).
154
+ Mutually exclusive with id and name.
151
155
  configuration: The configuration object containing API credentials and settings. Must include a valid
152
156
  `token` and `api_base_url`. May also contain a `default_request_queue_id` for fallback when neither
153
- `id` nor `name` is provided.
157
+ `id`, `name`, nor `alias` is provided.
154
158
 
155
159
  Returns:
156
160
  An instance for the opened or created storage client.
157
161
 
158
162
  Raises:
159
- ValueError: If the configuration is missing required fields (token, api_base_url), if both `id` and `name`
160
- are provided, or if neither `id` nor `name` is provided and no default storage ID is available
163
+ ValueError: If the configuration is missing required fields (token, api_base_url), if more than one of
164
+ `id`, `name`, or `alias` is provided, or if none are provided and no default storage ID is available
161
165
  in the configuration.
162
166
  """
167
+ if sum(1 for param in [id, name, alias] if param is not None) > 1:
168
+ raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
169
+
163
170
  token = configuration.token
164
171
  if not token:
165
172
  raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
@@ -185,25 +192,32 @@ class ApifyRequestQueueClient(RequestQueueClient):
185
192
  )
186
193
  apify_rqs_client = apify_client_async.request_queues()
187
194
 
188
- match (id, name):
189
- case (None, None):
190
- # If both id and name are None, try to get the default storage ID from environment variables.
191
- # The default storage ID environment variable is set by the Apify platform. It also contains
192
- # a new storage ID after Actor's reboot or migration.
193
- id = configuration.default_request_queue_id
194
- case (None, name):
195
- # If only name is provided, get or create the storage by name.
196
- id = RequestQueueMetadata.model_validate(
197
- await apify_rqs_client.get_or_create(name=name),
198
- ).id
199
- case (_, None):
200
- # If only id is provided, use it.
201
- pass
202
- case (_, _):
203
- # If both id and name are provided, raise an error.
204
- raise ValueError('Only one of "id" or "name" can be specified, not both.')
205
- if id is None:
206
- raise RuntimeError('Unreachable code')
195
+ # Normalize 'default' alias to None
196
+ alias = None if alias == 'default' else alias
197
+
198
+ # Handle alias resolution
199
+ if alias:
200
+ # Try to resolve alias to existing storage ID
201
+ resolved_id = await resolve_alias_to_id(alias, 'rq', configuration)
202
+ if resolved_id:
203
+ id = resolved_id
204
+ else:
205
+ # Create a new storage and store the alias mapping
206
+ new_storage_metadata = RequestQueueMetadata.model_validate(
207
+ await apify_rqs_client.get_or_create(),
208
+ )
209
+ id = new_storage_metadata.id
210
+ await store_alias_mapping(alias, 'rq', id, configuration)
211
+
212
+ # If name is provided, get or create the storage by name.
213
+ elif name:
214
+ id = RequestQueueMetadata.model_validate(
215
+ await apify_rqs_client.get_or_create(name=name),
216
+ ).id
217
+
218
+ # If none are provided, try to get the default storage ID from environment variables.
219
+ elif id is None:
220
+ id = configuration.default_request_queue_id
207
221
 
208
222
  # Use suitable client_key to make `hadMultipleClients` response of Apify API useful.
209
223
  # It should persist across migrated or resurrected Actor runs on the Apify platform.
@@ -227,7 +241,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
227
241
  # Verify that the storage exists by fetching its metadata again.
228
242
  metadata = await apify_rq_client.get()
229
243
  if metadata is None:
230
- raise ValueError(f'Opening request queue with id={id} and name={name} failed.')
244
+ raise ValueError(f'Opening request queue with id={id}, name={name}, and alias={alias} failed.')
231
245
 
232
246
  metadata_model = RequestQueueMetadata.model_validate(metadata)
233
247
 
@@ -25,6 +25,7 @@ class ApifyStorageClient(StorageClient):
25
25
  *,
26
26
  id: str | None = None,
27
27
  name: str | None = None,
28
+ alias: str | None = None,
28
29
  configuration: Configuration | None = None,
29
30
  ) -> ApifyDatasetClient:
30
31
  # Import here to avoid circular imports.
@@ -32,7 +33,7 @@ class ApifyStorageClient(StorageClient):
32
33
 
33
34
  configuration = configuration or ApifyConfiguration.get_global_configuration()
34
35
  if isinstance(configuration, ApifyConfiguration):
35
- return await ApifyDatasetClient.open(id=id, name=name, configuration=configuration)
36
+ return await ApifyDatasetClient.open(id=id, name=name, alias=alias, configuration=configuration)
36
37
 
37
38
  raise TypeError(
38
39
  f'Expected "configuration" to be an instance of "apify.Configuration", '
@@ -45,6 +46,7 @@ class ApifyStorageClient(StorageClient):
45
46
  *,
46
47
  id: str | None = None,
47
48
  name: str | None = None,
49
+ alias: str | None = None,
48
50
  configuration: Configuration | None = None,
49
51
  ) -> ApifyKeyValueStoreClient:
50
52
  # Import here to avoid circular imports.
@@ -52,7 +54,7 @@ class ApifyStorageClient(StorageClient):
52
54
 
53
55
  configuration = configuration or ApifyConfiguration.get_global_configuration()
54
56
  if isinstance(configuration, ApifyConfiguration):
55
- return await ApifyKeyValueStoreClient.open(id=id, name=name, configuration=configuration)
57
+ return await ApifyKeyValueStoreClient.open(id=id, name=name, alias=alias, configuration=configuration)
56
58
 
57
59
  raise TypeError(
58
60
  f'Expected "configuration" to be an instance of "apify.Configuration", '
@@ -65,6 +67,7 @@ class ApifyStorageClient(StorageClient):
65
67
  *,
66
68
  id: str | None = None,
67
69
  name: str | None = None,
70
+ alias: str | None = None,
68
71
  configuration: Configuration | None = None,
69
72
  ) -> ApifyRequestQueueClient:
70
73
  # Import here to avoid circular imports.
@@ -72,7 +75,7 @@ class ApifyStorageClient(StorageClient):
72
75
 
73
76
  configuration = configuration or ApifyConfiguration.get_global_configuration()
74
77
  if isinstance(configuration, ApifyConfiguration):
75
- return await ApifyRequestQueueClient.open(id=id, name=name, configuration=configuration)
78
+ return await ApifyRequestQueueClient.open(id=id, name=name, alias=alias, configuration=configuration)
76
79
 
77
80
  raise TypeError(
78
81
  f'Expected "configuration" to be an instance of "apify.Configuration", '
@@ -0,0 +1,117 @@
1
+ from __future__ import annotations
2
+
3
+ from logging import getLogger
4
+ from typing import TYPE_CHECKING, Literal
5
+
6
+ from apify_client import ApifyClientAsync
7
+
8
+ if TYPE_CHECKING:
9
+ from apify_client.clients import KeyValueStoreClientAsync
10
+
11
+ from apify import Configuration
12
+
13
+ logger = getLogger(__name__)
14
+
15
+ _ALIAS_MAPPING_KEY = '__STORAGE_ALIASES_MAPPING'
16
+
17
+
18
+ async def resolve_alias_to_id(
19
+ alias: str,
20
+ storage_type: Literal['dataset', 'kvs', 'rq'],
21
+ configuration: Configuration,
22
+ ) -> str | None:
23
+ """Resolve a storage alias to its corresponding storage ID.
24
+
25
+ Args:
26
+ alias: The alias to resolve.
27
+ storage_type: Type of storage ('dataset', 'key_value_store', or 'request_queue').
28
+ configuration: The configuration object containing API credentials.
29
+
30
+ Returns:
31
+ The storage ID if found, None if the alias doesn't exist.
32
+ """
33
+ default_kvs_client = await _get_default_kvs_client(configuration)
34
+
35
+ # Create the dictionary key for this alias.
36
+ alias_key = f'alias-{storage_type}-{alias}'
37
+
38
+ try:
39
+ record = await default_kvs_client.get_record(_ALIAS_MAPPING_KEY)
40
+
41
+ # get_record can return {key: ..., value: ..., content_type: ...}
42
+ if isinstance(record, dict) and 'value' in record:
43
+ record = record['value']
44
+
45
+ # Extract the actual data from the KVS record
46
+ if isinstance(record, dict) and alias_key in record:
47
+ storage_id = record[alias_key]
48
+ return str(storage_id)
49
+
50
+ except Exception as exc:
51
+ # If there's any error accessing the record, treat it as not found.
52
+ logger.warning(f'Error accessing alias mapping for {alias}: {exc}')
53
+
54
+ return None
55
+
56
+
57
+ async def store_alias_mapping(
58
+ alias: str,
59
+ storage_type: Literal['dataset', 'kvs', 'rq'],
60
+ storage_id: str,
61
+ configuration: Configuration,
62
+ ) -> None:
63
+ """Store a mapping from alias to storage ID in the default key-value store.
64
+
65
+ Args:
66
+ alias: The alias to store.
67
+ storage_type: Type of storage ('dataset', 'key_value_store', or 'request_queue').
68
+ storage_id: The storage ID to map the alias to.
69
+ configuration: The configuration object containing API credentials.
70
+ """
71
+ default_kvs_client = await _get_default_kvs_client(configuration)
72
+
73
+ # Create the dictionary key for this alias.
74
+ alias_key = f'alias-{storage_type}-{alias}'
75
+
76
+ try:
77
+ record = await default_kvs_client.get_record(_ALIAS_MAPPING_KEY)
78
+
79
+ # get_record can return {key: ..., value: ..., content_type: ...}
80
+ if isinstance(record, dict) and 'value' in record:
81
+ record = record['value']
82
+
83
+ # Update or create the record with the new alias mapping
84
+ if isinstance(record, dict):
85
+ record[alias_key] = storage_id
86
+ else:
87
+ record = {alias_key: storage_id}
88
+
89
+ # Store the mapping back in the KVS.
90
+ await default_kvs_client.set_record(_ALIAS_MAPPING_KEY, record)
91
+ except Exception as exc:
92
+ logger.warning(f'Error accessing alias mapping for {alias}: {exc}')
93
+
94
+
95
+ async def _get_default_kvs_client(configuration: Configuration) -> KeyValueStoreClientAsync:
96
+ """Get a client for the default key-value store."""
97
+ token = configuration.token
98
+ if not token:
99
+ raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
100
+
101
+ api_url = configuration.api_base_url
102
+ if not api_url:
103
+ raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
104
+
105
+ # Create Apify client with the provided token and API URL
106
+ apify_client_async = ApifyClientAsync(
107
+ token=token,
108
+ api_url=api_url,
109
+ max_retries=8,
110
+ min_delay_between_retries_millis=500,
111
+ timeout_secs=360,
112
+ )
113
+
114
+ # Get the default key-value store ID from configuration
115
+ default_kvs_id = configuration.default_key_value_store_id
116
+
117
+ return apify_client_async.key_value_store(key_value_store_id=default_kvs_id)
@@ -1,4 +1,6 @@
1
1
  import asyncio
2
+ import json
3
+ from pathlib import Path
2
4
 
3
5
  from typing_extensions import override
4
6
 
@@ -23,9 +25,15 @@ class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
23
25
  the `INPUT.json` file. It also updates the metadata to reflect that the store has been purged.
24
26
  """
25
27
  kvs_input_key = Configuration.get_global_configuration().input_key
28
+
29
+ # First try to find the alternative format of the input file and process it if it exists.
30
+ for file_path in self.path_to_kvs.glob('*'):
31
+ if file_path.name == f'{kvs_input_key}.json':
32
+ await self._process_input_json(file_path)
33
+
26
34
  async with self._lock:
27
35
  for file_path in self.path_to_kvs.glob('*'):
28
- if file_path.name in {METADATA_FILENAME, f'{kvs_input_key}.json'}:
36
+ if file_path.name in {METADATA_FILENAME, kvs_input_key, f'{kvs_input_key}.{METADATA_FILENAME}'}:
29
37
  continue
30
38
  if file_path.is_file():
31
39
  await asyncio.to_thread(file_path.unlink, missing_ok=True)
@@ -34,3 +42,16 @@ class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
34
42
  update_accessed_at=True,
35
43
  update_modified_at=True,
36
44
  )
45
+
46
+ async def _process_input_json(self, path: Path) -> None:
47
+ """Process simple input json file to format expected by the FileSystemKeyValueStoreClient.
48
+
49
+ For example: INPUT.json -> INPUT, INPUT.json.metadata
50
+ """
51
+ try:
52
+ f = await asyncio.to_thread(path.open)
53
+ input_data = json.load(f)
54
+ finally:
55
+ f.close()
56
+ await asyncio.to_thread(path.unlink, missing_ok=True)
57
+ await self.set_value(key=path.stem, value=input_data)
@@ -27,9 +27,15 @@ class ApifyFileSystemStorageClient(FileSystemStorageClient):
27
27
  *,
28
28
  id: str | None = None,
29
29
  name: str | None = None,
30
+ alias: str | None = None,
30
31
  configuration: Configuration | None = None,
31
32
  ) -> FileSystemKeyValueStoreClient:
32
33
  configuration = configuration or Configuration.get_global_configuration()
33
- client = await ApifyFileSystemKeyValueStoreClient.open(id=id, name=name, configuration=configuration)
34
+ client = await ApifyFileSystemKeyValueStoreClient.open(
35
+ id=id,
36
+ name=name,
37
+ alias=alias,
38
+ configuration=configuration,
39
+ )
34
40
  await self._purge_if_needed(client, configuration)
35
41
  return client
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apify
3
- Version: 2.7.1b14
3
+ Version: 2.7.1b16
4
4
  Summary: Apify SDK for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
@@ -228,7 +228,7 @@ Requires-Python: >=3.10
228
228
  Requires-Dist: apify-client<3.0.0,>=2.0.0
229
229
  Requires-Dist: apify-shared<3.0.0,>=2.0.0
230
230
  Requires-Dist: cachetools>=5.5.0
231
- Requires-Dist: crawlee==1.0.0rc1
231
+ Requires-Dist: crawlee==0.6.13b37
232
232
  Requires-Dist: cryptography>=42.0.0
233
233
  Requires-Dist: impit>=0.5.3
234
234
  Requires-Dist: lazy-object-proxy>=1.11.0
@@ -1,5 +1,5 @@
1
1
  apify/__init__.py,sha256=HpgKg2FZWJuSPfDygzJ62psylhw4NN4tKFnoYUIhcd4,838
2
- apify/_actor.py,sha256=kOEvs_3TxjZ1PlwLEnIEL5nAD12oKs0ULldYyI-9irw,52985
2
+ apify/_actor.py,sha256=i0pWWcysaLCZ64fJH9dznbpWF59BxG67QQjm3w0OO3s,53322
3
3
  apify/_charging.py,sha256=KjZ2DnEMS0Tt8ibizmmt0RwBq8FOAsD1z-hKFgdazcY,13143
4
4
  apify/_configuration.py,sha256=Ta-qPlKenLaI0IOlODg_A4ZwmPtnuS7OWLSJyexcqmA,13283
5
5
  apify/_consts.py,sha256=CjhyEJ4Mi0lcIrzfqz8dN7nPJWGjCeBrrXQy1PZ6zRI,440
@@ -9,7 +9,7 @@ apify/_proxy_configuration.py,sha256=K9V4vG9-bAx7_a4l0zHhfbzvzopJeHek-qUJ05aQ6wI
9
9
  apify/_utils.py,sha256=og_zzRXELQmirklJWp1kSV7pwQPFfAE81UO3IZ4xBNs,2414
10
10
  apify/log.py,sha256=Ry251sK4qPRGiFGe3DKcOZazg5OzC6RXS546Zzk8H2M,1003
11
11
  apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- apify/events/__init__.py,sha256=daZzFNJlJFnm0HZZQVeT26pvRWufjHzKZYERANgfX4o,182
12
+ apify/events/__init__.py,sha256=fLNqlsM6AboUQrAxmb-GD1Pw6oDivN_eSAviGJtqc7c,198
13
13
  apify/events/_apify_event_manager.py,sha256=yArFrKa4wWDZo32iwaA3F_w36VSJf1Yaj_L1opo8ncU,5917
14
14
  apify/events/_types.py,sha256=F0BHgACqnRfmdQ9GUcpnZvPxzw2bdRr8BqbGSA4cHeQ,3050
15
15
  apify/events/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -35,18 +35,19 @@ apify/scrapy/pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
35
35
  apify/storage_clients/__init__.py,sha256=9WLAKs2GnnP0yyKR0mc3AfJ1IqXF48V3KPMp6KaB8kU,277
36
36
  apify/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBsWa15Y5e7RMU,347
38
- apify/storage_clients/_apify/_dataset_client.py,sha256=8ZQvbtXZm54-V0Ukio0Z4jVI2gGkfqzZ59GlBQJXGUU,11485
39
- apify/storage_clients/_apify/_key_value_store_client.py,sha256=WbyzDCFmJS2hd_7ddYL3JEO9zvjUAAE1D_F4kohiim4,9455
38
+ apify/storage_clients/_apify/_dataset_client.py,sha256=tyx5QKbhyF9s2Y2gByVS2Pm3oE-MTvli6b8bkwQzMtk,12004
39
+ apify/storage_clients/_apify/_key_value_store_client.py,sha256=gFt7AFIE6oAJ-g2rx7CG3r7ZWD0URUXqaw4Tk5zES0M,9916
40
40
  apify/storage_clients/_apify/_models.py,sha256=C6FpXswtO6kXE5RUumazm_conzJJS6PrXAGF9XBuDb8,3651
41
- apify/storage_clients/_apify/_request_queue_client.py,sha256=LuKH_7Y9TMU1qtSagWRPsrb5aKcAIp3dkupS9W4615o,32117
42
- apify/storage_clients/_apify/_storage_client.py,sha256=5me6gHOeNAG3JaHxKRdzsZaa3FsqLDbObjhECGGWrr4,2890
41
+ apify/storage_clients/_apify/_request_queue_client.py,sha256=Ti48ncuVQkrf6RyoROgYy6MzZ2Tsmi_lurm-2UpWWSs,32528
42
+ apify/storage_clients/_apify/_storage_client.py,sha256=iU67040i7Tmb-qoVZGwcgTF_qZUxsGQpAeV_oIBrpNU,3031
43
+ apify/storage_clients/_apify/_utils.py,sha256=5fy0WErl9HFy78qUPw-fS4jgxMFg-NJvJkuP58WGHuY,3992
43
44
  apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
45
  apify/storage_clients/_file_system/__init__.py,sha256=rDbXatXV9wHKPhKTrXDzWnexhTm7sIJQWucMi-P-SD4,130
45
- apify/storage_clients/_file_system/_key_value_store_client.py,sha256=DHDv_e0kFwhBCIXUadZAamHhUao3NsSniUEXgnttnVY,1430
46
- apify/storage_clients/_file_system/_storage_client.py,sha256=UwxuSvhbyQ7zR1db1hTmZ1h38yH7btHNp82X7e8MWWE,1290
46
+ apify/storage_clients/_file_system/_key_value_store_client.py,sha256=fnSJ1EIOPCGfcE6e5S3Tux9VbnMVLCJjugkaQoH_9yo,2267
47
+ apify/storage_clients/_file_system/_storage_client.py,sha256=rcwpKYlrWzvlSA2xoxftg-EZAi_iGZ3vOCbu0C5lKDE,1396
47
48
  apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
48
49
  apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- apify-2.7.1b14.dist-info/METADATA,sha256=hKa1ufnBfaSa3vq9dx0zm36o9q3o1iD5IqIH9P992y8,22579
50
- apify-2.7.1b14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
51
- apify-2.7.1b14.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
52
- apify-2.7.1b14.dist-info/RECORD,,
50
+ apify-2.7.1b16.dist-info/METADATA,sha256=l8r4yR5ZvjkJ80AHa1ph4zLqaabYTegxZtrGp9PCq5c,22580
51
+ apify-2.7.1b16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
52
+ apify-2.7.1b16.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
53
+ apify-2.7.1b16.dist-info/RECORD,,