apify 3.0.3__py3-none-any.whl → 3.0.4b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

apify/_actor.py CHANGED
@@ -48,7 +48,6 @@ if TYPE_CHECKING:
48
48
  from typing_extensions import Self
49
49
 
50
50
  from crawlee.proxy_configuration import _NewUrlFunction
51
- from crawlee.storage_clients import StorageClient
52
51
 
53
52
  from apify._models import Webhook
54
53
 
@@ -140,7 +139,6 @@ class _ActorType:
140
139
  # `__init__` method should not be considered final.
141
140
 
142
141
  self._apify_client: ApifyClientAsync | None = None
143
- self._local_storage_client: StorageClient | None = None
144
142
 
145
143
  self._is_exiting = False
146
144
  self._is_initialized = False
@@ -120,7 +120,7 @@ class RequestQueueStats(BaseModel):
120
120
  """The number of request queue reads."""
121
121
 
122
122
  storage_bytes: Annotated[int, Field(alias='storageBytes', default=0)]
123
- """Storage size in Bytes."""
123
+ """Storage size in bytes."""
124
124
 
125
125
  write_count: Annotated[int, Field(alias='writeCount', default=0)]
126
126
  """The number of request queue writes."""
@@ -21,43 +21,59 @@ if TYPE_CHECKING:
21
21
 
22
22
  @docs_group('Storage clients')
23
23
  class ApifyStorageClient(StorageClient):
24
- """Apify storage client."""
24
+ """Apify platform implementation of the storage client.
25
+
26
+ This storage client provides access to datasets, key-value stores, and request queues that persist data
27
+ to the Apify platform. Each storage type is implemented with its own specific Apify client that stores data
28
+ in the cloud, making it accessible from anywhere.
29
+
30
+ The communication with the Apify platform is handled via the Apify API client for Python, which is an HTTP API
31
+ wrapper. For maximum efficiency and performance of the storage clients, various caching mechanisms are used to
32
+ minimize the number of API calls made to the Apify platform. Data can be inspected and manipulated through
33
+ the Apify console web interface or via the Apify API.
34
+
35
+ The request queue client supports two access modes controlled by the `request_queue_access` parameter:
36
+
37
+ ### Single mode
38
+
39
+ The `single` mode is optimized for scenarios with only one consumer. It minimizes API calls, making it faster
40
+ and more cost-efficient compared to the `shared` mode. This option is ideal when a single Actor is responsible
41
+ for consuming the entire request queue. Using multiple consumers simultaneously may lead to inconsistencies
42
+ or unexpected behavior.
43
+
44
+ In this mode, multiple producers can safely add new requests, but forefront requests may not be processed
45
+ immediately, as the client relies on local head estimation instead of frequent forefront fetching. Requests can
46
+ also be added or marked as handled by other clients, but they must not be deleted or modified, since such changes
47
+ would not be reflected in the local cache. If a request is already fully cached locally, marking it as handled
48
+ by another client will be ignored by this client. This does not cause errors but can occasionally result in
49
+ reprocessing a request that was already handled elsewhere. If the request was not yet cached locally, marking
50
+ it as handled poses no issue.
51
+
52
+ ### Shared mode
53
+
54
+ The `shared` mode is designed for scenarios with multiple concurrent consumers. It ensures proper synchronization
55
+ and consistency across clients, at the cost of higher API usage and slightly worse performance. This mode is safe
56
+ for concurrent access from multiple processes, including Actors running in parallel on the Apify platform. It
57
+ should be used when multiple consumers need to process requests from the same queue simultaneously.
58
+ """
59
+
60
+ _LSP_ERROR_MSG = 'Expected "configuration" to be an instance of "apify.Configuration", but got {} instead.'
61
+ """This class (intentionally) violates the Liskov Substitution Principle.
62
+
63
+ It requires a specialized `Configuration` instance compared to its parent class.
64
+ """
25
65
 
26
66
  def __init__(self, *, request_queue_access: Literal['single', 'shared'] = 'single') -> None:
27
- """Initialize the Apify storage client.
67
+ """Initialize a new instance.
28
68
 
29
69
  Args:
30
- request_queue_access: Controls the implementation of the request queue client based on expected scenario:
31
- - 'single' is suitable for single consumer scenarios. It makes less API calls, is cheaper and faster.
32
- - 'shared' is suitable for multiple consumers scenarios at the cost of higher API usage.
33
- Detailed constraints for the 'single' access type:
34
- - Only one client is consuming the request queue at the time.
35
- - Multiple producers can put requests to the queue, but their forefront requests are not guaranteed to
36
- be handled so quickly as this client does not aggressively fetch the forefront and relies on local
37
- head estimation.
38
- - Requests are only added to the queue, never deleted by other clients. (Marking as handled is ok.)
39
- - Other producers can add new requests, but not modify existing ones.
40
- (Modifications would not be included in local cache)
70
+ request_queue_access: Defines how the request queue client behaves. Use `single` mode for a single
71
+ consumer. It has fewer API calls, meaning better performance and lower costs. If you need multiple
72
+ concurrent consumers use `shared` mode, but expect worse performance and higher costs due to
73
+ the additional overhead.
41
74
  """
42
75
  self._request_queue_access = request_queue_access
43
76
 
44
- # This class breaches Liskov Substitution Principle. It requires specialized Configuration compared to its parent.
45
- _lsp_violation_error_message_template = (
46
- 'Expected "configuration" to be an instance of "apify.Configuration", but got {} instead.'
47
- )
48
-
49
- @override
50
- def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
51
- if isinstance(configuration, ApifyConfiguration):
52
- # It is not supported to open exactly same queue with 'single' and 'shared' client at the same time.
53
- # Whichever client variation gets used first, wins.
54
- return super().get_storage_client_cache_key(configuration), hash_api_base_url_and_token(configuration)
55
-
56
- config_class = type(configuration)
57
- raise TypeError(
58
- self._lsp_violation_error_message_template.format(f'{config_class.__module__}.{config_class.__name__}')
59
- )
60
-
61
77
  @override
62
78
  async def create_dataset_client(
63
79
  self,
@@ -71,7 +87,7 @@ class ApifyStorageClient(StorageClient):
71
87
  if isinstance(configuration, ApifyConfiguration):
72
88
  return await ApifyDatasetClient.open(id=id, name=name, alias=alias, configuration=configuration)
73
89
 
74
- raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))
90
+ raise TypeError(self._LSP_ERROR_MSG.format(type(configuration).__name__))
75
91
 
76
92
  @override
77
93
  async def create_kvs_client(
@@ -86,7 +102,7 @@ class ApifyStorageClient(StorageClient):
86
102
  if isinstance(configuration, ApifyConfiguration):
87
103
  return await ApifyKeyValueStoreClient.open(id=id, name=name, alias=alias, configuration=configuration)
88
104
 
89
- raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))
105
+ raise TypeError(self._LSP_ERROR_MSG.format(type(configuration).__name__))
90
106
 
91
107
  @override
92
108
  async def create_rq_client(
@@ -103,4 +119,14 @@ class ApifyStorageClient(StorageClient):
103
119
  id=id, name=name, alias=alias, configuration=configuration, access=self._request_queue_access
104
120
  )
105
121
 
106
- raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))
122
+ raise TypeError(self._LSP_ERROR_MSG.format(type(configuration).__name__))
123
+
124
+ @override
125
+ def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
126
+ if isinstance(configuration, ApifyConfiguration):
127
+ # It is not supported to open exactly same queue with 'single' and 'shared' client at the same time.
128
+ # Whichever client variation gets used first, wins.
129
+ return super().get_storage_client_cache_key(configuration), hash_api_base_url_and_token(configuration)
130
+
131
+ config_class = type(configuration)
132
+ raise TypeError(self._LSP_ERROR_MSG.format(f'{config_class.__module__}.{config_class.__name__}'))
@@ -8,8 +8,7 @@ from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, Re
8
8
 
9
9
  from apify._configuration import Configuration as ApifyConfiguration
10
10
  from apify._utils import docs_group
11
- from apify.storage_clients import ApifyStorageClient
12
- from apify.storage_clients._file_system import ApifyFileSystemStorageClient
11
+ from apify.storage_clients import ApifyStorageClient, FileSystemStorageClient
13
12
 
14
13
  if TYPE_CHECKING:
15
14
  from collections.abc import Hashable
@@ -19,28 +18,36 @@ if TYPE_CHECKING:
19
18
 
20
19
  @docs_group('Storage clients')
21
20
  class SmartApifyStorageClient(StorageClient):
22
- """SmartApifyStorageClient that delegates to cloud_storage_client or local_storage_client.
21
+ """Storage client that automatically selects cloud or local storage client based on the environment.
23
22
 
24
- When running on Apify platform use cloud_storage_client, else use local_storage_client. This storage client is
25
- designed to work specifically in Actor context.
23
+ This storage client provides access to datasets, key-value stores, and request queues by intelligently
24
+ delegating to either the cloud or local storage client based on the execution environment and configuration.
25
+
26
+ When running on the Apify platform (which is detected via environment variables), this client automatically
27
+ uses the `cloud_storage_client` to store storage data there. When running locally, it uses the
28
+ `local_storage_client` to store storage data there. You can also force cloud storage usage from your
29
+ local machine by using the `force_cloud` argument.
30
+
31
+ This storage client is designed to work specifically in `Actor` context and provides a seamless development
32
+ experience where the same code works both locally and on the Apify platform without any changes.
26
33
  """
27
34
 
28
35
  def __init__(
29
36
  self,
30
37
  *,
31
- cloud_storage_client: ApifyStorageClient | None = None,
38
+ cloud_storage_client: StorageClient | None = None,
32
39
  local_storage_client: StorageClient | None = None,
33
40
  ) -> None:
34
- """Initialize the Apify storage client.
41
+ """Initialize a new instance.
35
42
 
36
43
  Args:
37
- cloud_storage_client: Client used to communicate with the Apify platform storage. Either through
38
- `force_cloud` argument when opening storages or automatically when running on the Apify platform.
39
- local_storage_client: Client used to communicate with the storage when not running on the Apify
40
- platform and not using `force_cloud` argument when opening storages.
44
+ cloud_storage_client: Storage client used when an Actor is running on the Apify platform, or when
45
+ explicitly enabled via the `force_cloud` argument. Defaults to `ApifyStorageClient`.
46
+ local_storage_client: Storage client used when an Actor is not running on the Apify platform and when
47
+ `force_cloud` flag is not set. Defaults to `FileSystemStorageClient`.
41
48
  """
42
- self._cloud_storage_client = cloud_storage_client or ApifyStorageClient(request_queue_access='single')
43
- self._local_storage_client = local_storage_client or ApifyFileSystemStorageClient()
49
+ self._cloud_storage_client = cloud_storage_client or ApifyStorageClient()
50
+ self._local_storage_client = local_storage_client or FileSystemStorageClient()
44
51
 
45
52
  def __str__(self) -> str:
46
53
  return (
@@ -48,26 +55,6 @@ class SmartApifyStorageClient(StorageClient):
48
55
  f' local_storage_client={self._local_storage_client.__class__.__name__})'
49
56
  )
50
57
 
51
- def get_suitable_storage_client(self, *, force_cloud: bool = False) -> StorageClient:
52
- """Get a suitable storage client based on the global configuration and the value of the force_cloud flag.
53
-
54
- Args:
55
- force_cloud: If True, return `cloud_storage_client`.
56
- """
57
- if ApifyConfiguration.get_global_configuration().is_at_home:
58
- return self._cloud_storage_client
59
-
60
- configuration = ApifyConfiguration.get_global_configuration()
61
- if force_cloud:
62
- if configuration.token is None:
63
- raise RuntimeError(
64
- 'In order to use the Apify cloud storage from your computer, '
65
- 'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
66
- )
67
- return self._cloud_storage_client
68
-
69
- return self._local_storage_client
70
-
71
58
  @override
72
59
  def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
73
60
  if ApifyConfiguration.get_global_configuration().is_at_home:
@@ -115,3 +102,23 @@ class SmartApifyStorageClient(StorageClient):
115
102
  return await self.get_suitable_storage_client().create_rq_client(
116
103
  id=id, name=id, alias=alias, configuration=configuration
117
104
  )
105
+
106
+ def get_suitable_storage_client(self, *, force_cloud: bool = False) -> StorageClient:
107
+ """Get a suitable storage client based on the global configuration and the value of the force_cloud flag.
108
+
109
+ Args:
110
+ force_cloud: If True, return `cloud_storage_client`.
111
+ """
112
+ if ApifyConfiguration.get_global_configuration().is_at_home:
113
+ return self._cloud_storage_client
114
+
115
+ configuration = ApifyConfiguration.get_global_configuration()
116
+ if force_cloud:
117
+ if configuration.token is None:
118
+ raise RuntimeError(
119
+ 'In order to use the Apify cloud storage from your computer, '
120
+ 'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
121
+ )
122
+ return self._cloud_storage_client
123
+
124
+ return self._local_storage_client
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apify
3
- Version: 3.0.3
3
+ Version: 3.0.4b1
4
4
  Summary: Apify SDK for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
@@ -1,5 +1,5 @@
1
1
  apify/__init__.py,sha256=HpgKg2FZWJuSPfDygzJ62psylhw4NN4tKFnoYUIhcd4,838
2
- apify/_actor.py,sha256=kfrwD8gaeN4NcdNMD_Pj66agNh78jJjwMuNOuwLdo-E,57370
2
+ apify/_actor.py,sha256=eJzlRicWkVuO-151ikjJnRMRl-UgJ2QKg6wahtoc2Rc,57252
3
3
  apify/_charging.py,sha256=KjZ2DnEMS0Tt8ibizmmt0RwBq8FOAsD1z-hKFgdazcY,13143
4
4
  apify/_configuration.py,sha256=7ZHhgRp98kr35zx4k4EB2aImq7Dq1FJjPg7r5bucv_M,14984
5
5
  apify/_consts.py,sha256=CjhyEJ4Mi0lcIrzfqz8dN7nPJWGjCeBrrXQy1PZ6zRI,440
@@ -37,21 +37,21 @@ apify/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
37
37
  apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBsWa15Y5e7RMU,347
38
38
  apify/storage_clients/_apify/_dataset_client.py,sha256=qmCJyL1MN83tYRXmc31P6yMIXVZMyRrGjr7R6-86FSE,11869
39
39
  apify/storage_clients/_apify/_key_value_store_client.py,sha256=994a5bM_BGHIeirnny6QlXjy5CzMU2I9SmMksCbHCUY,9357
40
- apify/storage_clients/_apify/_models.py,sha256=XxBru5XFdj0jqX6V-uVahT-pMQU3pZ501aTNzXCuoMU,4556
40
+ apify/storage_clients/_apify/_models.py,sha256=szYdJOvWQ6hrmwUp7y7QsoQrRh5TlIYNxmO7nOe-M14,4556
41
41
  apify/storage_clients/_apify/_request_queue_client.py,sha256=tAyap34gpxvPiQ0McDjX5ojq1ZIZc4EI3PrW8VQqS4k,13292
42
42
  apify/storage_clients/_apify/_request_queue_shared_client.py,sha256=pWmd6aPxM-eZ6PC1MfsfTcjD2mGGpCDS3ZZ3cG_2MEA,20971
43
43
  apify/storage_clients/_apify/_request_queue_single_client.py,sha256=d2txMwxW1nlYnvjdOH8xpxhcOYNeyc1ousGHRE7jsPg,17468
44
- apify/storage_clients/_apify/_storage_client.py,sha256=hFl_PuX1UgOydBD6pieZ0u2NWbDmZV-i0qygKdsuHt4,4873
44
+ apify/storage_clients/_apify/_storage_client.py,sha256=TcmMzbEMgyndxaT6lIMl2fTG4oQR_lpLSHTmwwH0tXs,6515
45
45
  apify/storage_clients/_apify/_utils.py,sha256=375gk_TJyMWIIgRbE9SS0hQup0h6sA3mzpTG53XIjkM,8769
46
46
  apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
47
  apify/storage_clients/_file_system/__init__.py,sha256=rDbXatXV9wHKPhKTrXDzWnexhTm7sIJQWucMi-P-SD4,130
48
48
  apify/storage_clients/_file_system/_key_value_store_client.py,sha256=gxM3ap67PnY80Rd7P3onPAf2pksYpU0LoAlJdayEMdU,4179
49
49
  apify/storage_clients/_file_system/_storage_client.py,sha256=rcwpKYlrWzvlSA2xoxftg-EZAi_iGZ3vOCbu0C5lKDE,1396
50
50
  apify/storage_clients/_smart_apify/__init__.py,sha256=614B2AaWY-dx6RQ6mod7VVR8gFh75-_jnq5BeDD7hSc,53
51
- apify/storage_clients/_smart_apify/_storage_client.py,sha256=GCPmVe_xWAFcO2Cuej4su4i97_d33Q9Ih_Sc5xW2Wa4,4674
51
+ apify/storage_clients/_smart_apify/_storage_client.py,sha256=ZNNY4Qm9Cx_UFqBaforT28gC4hhOnCcKWpUYCIvzj48,5218
52
52
  apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
53
53
  apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
- apify-3.0.3.dist-info/METADATA,sha256=BghvJ18oGZT-KqyoV3wlC6WdJ-rVupfzSUaWXLix2-k,22580
55
- apify-3.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
56
- apify-3.0.3.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
57
- apify-3.0.3.dist-info/RECORD,,
54
+ apify-3.0.4b1.dist-info/METADATA,sha256=Y3LBVJMWeWazp-foQTEHw-5kD1AGlTkmOPRwJZ2KAcU,22582
55
+ apify-3.0.4b1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
56
+ apify-3.0.4b1.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
57
+ apify-3.0.4b1.dist-info/RECORD,,
File without changes