crawlee 1.0.1b12__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@ from pathlib import Path
9
9
  from typing import TYPE_CHECKING, Any
10
10
 
11
11
  from pydantic import ValidationError
12
- from typing_extensions import override
12
+ from typing_extensions import Self, override
13
13
 
14
14
  from crawlee._consts import METADATA_FILENAME
15
15
  from crawlee._utils.crypto import crypto_random_object_id
@@ -94,7 +94,7 @@ class FileSystemDatasetClient(DatasetClient):
94
94
  name: str | None,
95
95
  alias: str | None,
96
96
  configuration: Configuration,
97
- ) -> FileSystemDatasetClient:
97
+ ) -> Self:
98
98
  """Open or create a file system dataset client.
99
99
 
100
100
  This method attempts to open an existing dataset from the file system. If a dataset with the specified ID
@@ -10,7 +10,7 @@ from pathlib import Path
10
10
  from typing import TYPE_CHECKING, Any
11
11
 
12
12
  from pydantic import ValidationError
13
- from typing_extensions import override
13
+ from typing_extensions import Self, override
14
14
 
15
15
  from crawlee._consts import METADATA_FILENAME
16
16
  from crawlee._utils.crypto import crypto_random_object_id
@@ -93,7 +93,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
93
93
  name: str | None,
94
94
  alias: str | None,
95
95
  configuration: Configuration,
96
- ) -> FileSystemKeyValueStoreClient:
96
+ ) -> Self:
97
97
  """Open or create a file system key-value store client.
98
98
 
99
99
  This method attempts to open an existing key-value store from the file system. If a KVS with the specified
@@ -11,7 +11,7 @@ from pathlib import Path
11
11
  from typing import TYPE_CHECKING
12
12
 
13
13
  from pydantic import BaseModel, ValidationError
14
- from typing_extensions import override
14
+ from typing_extensions import Self, override
15
15
 
16
16
  from crawlee import Request
17
17
  from crawlee._consts import METADATA_FILENAME
@@ -144,7 +144,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
144
144
  name: str | None,
145
145
  alias: str | None,
146
146
  configuration: Configuration,
147
- ) -> FileSystemRequestQueueClient:
147
+ ) -> Self:
148
148
  """Open or create a file system request queue client.
149
149
 
150
150
  This method attempts to open an existing request queue from the file system. If a queue with the specified
@@ -4,7 +4,7 @@ from datetime import datetime, timezone
4
4
  from logging import getLogger
5
5
  from typing import TYPE_CHECKING, Any
6
6
 
7
- from typing_extensions import override
7
+ from typing_extensions import Self, override
8
8
 
9
9
  from crawlee._utils.crypto import crypto_random_object_id
10
10
  from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
@@ -55,7 +55,7 @@ class MemoryDatasetClient(DatasetClient):
55
55
  id: str | None,
56
56
  name: str | None,
57
57
  alias: str | None,
58
- ) -> MemoryDatasetClient:
58
+ ) -> Self:
59
59
  """Open or create a new memory dataset client.
60
60
 
61
61
  This method creates a new in-memory dataset instance. Unlike persistent storage implementations, memory
@@ -4,7 +4,7 @@ import sys
4
4
  from datetime import datetime, timezone
5
5
  from typing import TYPE_CHECKING, Any
6
6
 
7
- from typing_extensions import override
7
+ from typing_extensions import Self, override
8
8
 
9
9
  from crawlee._utils.crypto import crypto_random_object_id
10
10
  from crawlee._utils.file import infer_mime_type
@@ -53,7 +53,7 @@ class MemoryKeyValueStoreClient(KeyValueStoreClient):
53
53
  id: str | None,
54
54
  name: str | None,
55
55
  alias: str | None,
56
- ) -> MemoryKeyValueStoreClient:
56
+ ) -> Self:
57
57
  """Open or create a new memory key-value store client.
58
58
 
59
59
  This method creates a new in-memory key-value store instance. Unlike persistent storage implementations,
@@ -6,7 +6,7 @@ from datetime import datetime, timezone
6
6
  from logging import getLogger
7
7
  from typing import TYPE_CHECKING
8
8
 
9
- from typing_extensions import override
9
+ from typing_extensions import Self, override
10
10
 
11
11
  from crawlee import Request
12
12
  from crawlee._utils.crypto import crypto_random_object_id
@@ -65,7 +65,7 @@ class MemoryRequestQueueClient(RequestQueueClient):
65
65
  id: str | None,
66
66
  name: str | None,
67
67
  alias: str | None,
68
- ) -> MemoryRequestQueueClient:
68
+ ) -> Self:
69
69
  """Open or create a new memory request queue client.
70
70
 
71
71
  This method creates a new in-memory request queue instance. Unlike persistent storage implementations,
@@ -4,7 +4,7 @@ from logging import getLogger
4
4
  from typing import TYPE_CHECKING, Any
5
5
 
6
6
  from sqlalchemy import Select, insert, select
7
- from typing_extensions import override
7
+ from typing_extensions import Self, override
8
8
 
9
9
  from crawlee.storage_clients._base import DatasetClient
10
10
  from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
@@ -78,7 +78,7 @@ class SqlDatasetClient(DatasetClient, SqlClientMixin):
78
78
  name: str | None,
79
79
  alias: str | None,
80
80
  storage_client: SqlStorageClient,
81
- ) -> SqlDatasetClient:
81
+ ) -> Self:
82
82
  """Open an existing dataset or create a new one.
83
83
 
84
84
  Args:
@@ -5,7 +5,7 @@ from logging import getLogger
5
5
  from typing import TYPE_CHECKING, Any
6
6
 
7
7
  from sqlalchemy import delete, select
8
- from typing_extensions import override
8
+ from typing_extensions import Self, override
9
9
 
10
10
  from crawlee._utils.file import infer_mime_type
11
11
  from crawlee.storage_clients._base import KeyValueStoreClient
@@ -77,7 +77,7 @@ class SqlKeyValueStoreClient(KeyValueStoreClient, SqlClientMixin):
77
77
  name: str | None,
78
78
  alias: str | None,
79
79
  storage_client: SqlStorageClient,
80
- ) -> SqlKeyValueStoreClient:
80
+ ) -> Self:
81
81
  """Open or create a SQL key-value store client.
82
82
 
83
83
  This method attempts to open an existing key-value store from the SQL database. If a KVS with the specified
@@ -10,7 +10,7 @@ from typing import TYPE_CHECKING, Any
10
10
  from sqlalchemy import func, or_, select, update
11
11
  from sqlalchemy.exc import SQLAlchemyError
12
12
  from sqlalchemy.orm import load_only
13
- from typing_extensions import NotRequired, override
13
+ from typing_extensions import NotRequired, Self, override
14
14
 
15
15
  from crawlee import Request
16
16
  from crawlee._utils.crypto import crypto_random_object_id
@@ -119,7 +119,7 @@ class SqlRequestQueueClient(RequestQueueClient, SqlClientMixin):
119
119
  name: str | None,
120
120
  alias: str | None,
121
121
  storage_client: SqlStorageClient,
122
- ) -> SqlRequestQueueClient:
122
+ ) -> Self:
123
123
  """Open an existing request queue or create a new one.
124
124
 
125
125
  This method first tries to find an existing queue by ID or name.
crawlee/storages/_base.py CHANGED
@@ -44,7 +44,9 @@ class Storage(ABC):
44
44
 
45
45
  Args:
46
46
  id: The storage ID.
47
- name: The storage name (global scope, persists across runs).
47
+ name: The storage name (global scope, persists across runs). Name can only contain letters "a" through "z",
48
+ the digits "0" through "9", and the hyphen ("-") but only in the middle of the string
49
+ (e.g. "my-value-1").
48
50
  alias: The storage alias (run scope, creates unnamed storage).
49
51
  configuration: Configuration object used during the storage creation or restoration process.
50
52
  storage_client: Underlying storage client to use. If not provided, the default global storage client
@@ -12,6 +12,7 @@ from crawlee._utils.file import export_csv_to_stream, export_json_to_stream
12
12
 
13
13
  from ._base import Storage
14
14
  from ._key_value_store import KeyValueStore
15
+ from ._utils import validate_storage_name
15
16
 
16
17
  if TYPE_CHECKING:
17
18
  from collections.abc import AsyncIterator
@@ -75,6 +76,8 @@ class Dataset(Storage):
75
76
  id: The unique identifier of the storage.
76
77
  name: The name of the storage, if available.
77
78
  """
79
+ validate_storage_name(name)
80
+
78
81
  self._client = client
79
82
  self._id = id
80
83
  self._name = name
@@ -15,6 +15,7 @@ from crawlee._utils.recoverable_state import RecoverableState
15
15
  from crawlee.storage_clients.models import KeyValueStoreMetadata
16
16
 
17
17
  from ._base import Storage
18
+ from ._utils import validate_storage_name
18
19
 
19
20
  if TYPE_CHECKING:
20
21
  from collections.abc import AsyncIterator
@@ -84,6 +85,8 @@ class KeyValueStore(Storage):
84
85
  id: The unique identifier of the storage.
85
86
  name: The name of the storage, if available.
86
87
  """
88
+ validate_storage_name(name)
89
+
87
90
  self._client = client
88
91
  self._id = id
89
92
  self._name = name
@@ -13,6 +13,7 @@ from crawlee._utils.wait import wait_for_all_tasks_for_finish
13
13
  from crawlee.request_loaders import RequestManager
14
14
 
15
15
  from ._base import Storage
16
+ from ._utils import validate_storage_name
16
17
 
17
18
  if TYPE_CHECKING:
18
19
  from collections.abc import Sequence
@@ -80,6 +81,8 @@ class RequestQueue(Storage, RequestManager):
80
81
  id: The unique identifier of the storage.
81
82
  name: The name of the storage, if available.
82
83
  """
84
+ validate_storage_name(name)
85
+
83
86
  self._client = client
84
87
  self._id = id
85
88
  self._name = name
@@ -8,6 +8,8 @@ from typing import TYPE_CHECKING, TypeVar
8
8
  from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
9
9
  from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, RequestQueueClient
10
10
 
11
+ from ._utils import validate_storage_name
12
+
11
13
  if TYPE_CHECKING:
12
14
  from ._base import Storage
13
15
 
@@ -90,7 +92,9 @@ class StorageInstanceManager:
90
92
  Args:
91
93
  cls: The storage class to instantiate.
92
94
  id: Storage ID.
93
- name: Storage name. (global scope, persists across runs).
95
+ name: Storage name. (global scope, persists across runs). Name can only contain letters "a" through "z",
96
+ the digits "0" through "9", and the hyphen ("-") but only in the middle of the string
97
+ (e.g. "my-value-1").
94
98
  alias: Storage alias (run scope, creates unnamed storage).
95
99
  client_opener_coro: Coroutine to open the storage client when storage instance not found in cache.
96
100
  storage_client_cache_key: Additional optional key from storage client to differentiate cache entries.
@@ -146,6 +150,10 @@ class StorageInstanceManager:
146
150
  f'Use a different name or drop the existing alias storage first.'
147
151
  )
148
152
 
153
+ # Validate storage name
154
+ if name is not None:
155
+ validate_storage_name(name)
156
+
149
157
  # Create new instance
150
158
  client: KeyValueStoreClient | DatasetClient | RequestQueueClient
151
159
  client = await client_opener_coro
@@ -0,0 +1,11 @@
1
+ import re
2
+
3
+ NAME_REGEX = re.compile(r'^([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])$')
4
+
5
+
6
+ def validate_storage_name(name: str | None) -> None:
7
+ if name and not NAME_REGEX.match(name):
8
+ raise ValueError(
9
+ f'Invalid storage name "{name}". Name can only contain letters "a" through "z", the digits "0" through'
10
+ '"9", and the hyphen ("-") but only in the middle of the string (e.g. "my-value-1")'
11
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.0.1b12
3
+ Version: 1.0.2
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -232,7 +232,7 @@ Requires-Dist: more-itertools>=10.2.0
232
232
  Requires-Dist: protego>=0.5.0
233
233
  Requires-Dist: psutil>=6.0.0
234
234
  Requires-Dist: pydantic-settings!=2.7.0,!=2.7.1,!=2.8.0,>=2.2.0
235
- Requires-Dist: pydantic>=2.11.0
235
+ Requires-Dist: pydantic<2.12.0,>=2.11.0
236
236
  Requires-Dist: pyee>=9.0.0
237
237
  Requires-Dist: tldextract>=5.1.0
238
238
  Requires-Dist: typing-extensions>=4.1.0
@@ -159,35 +159,36 @@ crawlee/storage_clients/_base/_request_queue_client.py,sha256=cgM4yk6xJwgfzP-xaN
159
159
  crawlee/storage_clients/_base/_storage_client.py,sha256=RvmKCV1U9_KxyG7n8xhClm2vwD2SKChWIiBLk6cuqw0,3523
160
160
  crawlee/storage_clients/_base/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
161
161
  crawlee/storage_clients/_file_system/__init__.py,sha256=w3twfwz5YeLYeu_70pNPBRINS2wXRvzOMvA1hUDYgf0,387
162
- crawlee/storage_clients/_file_system/_dataset_client.py,sha256=aklivbj6MsFOLiG9muHVBpOJ3vTgLK-xb_SOHVqHWeM,17748
163
- crawlee/storage_clients/_file_system/_key_value_store_client.py,sha256=i3Jz7GEPoyi9DRz76TuKs4VMSzQL18rbGaF3voJG3JQ,18695
164
- crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=2NN9you6sqfRonThcdqrEeXtU0I7YVMkxiiKJvU4TKQ,32880
162
+ crawlee/storage_clients/_file_system/_dataset_client.py,sha256=1Z8VCDx8ueh0FQQXUr8tJlOtKw8ggkaFjuz3-T_GJDY,17735
163
+ crawlee/storage_clients/_file_system/_key_value_store_client.py,sha256=qNa3RRJQ8Omy2AteQvYh1Td04PsP5AhUFyTpL6KQbSg,18676
164
+ crawlee/storage_clients/_file_system/_request_queue_client.py,sha256=0TM4BFcz2knQiWyF6dH62WTTj11wN9dNtpYOiBDqkpY,32862
165
165
  crawlee/storage_clients/_file_system/_storage_client.py,sha256=My63uc513kfUPe5X-PTYWBRe9xUGnkLqJN7IcsQd2yw,3293
166
166
  crawlee/storage_clients/_file_system/_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
167
167
  crawlee/storage_clients/_file_system/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
168
168
  crawlee/storage_clients/_memory/__init__.py,sha256=WHyBhckxdw2k0epkM_B3ymNASebNTOCU_NrvfzUAn14,355
169
- crawlee/storage_clients/_memory/_dataset_client.py,sha256=3LW9rB4IDIggwL8299OHUEe_2Mz0DA30vdReGSO0dvc,8836
170
- crawlee/storage_clients/_memory/_key_value_store_client.py,sha256=0u2y7Tl88dx3Lg0h6QA1nDA06VZiUZW3820JV8_OcUI,6487
171
- crawlee/storage_clients/_memory/_request_queue_client.py,sha256=yYdv6r4O7x8JcQ2v-COit-9YyB79AxKvzOJkOCcMDaY,13110
169
+ crawlee/storage_clients/_memory/_dataset_client.py,sha256=wxuhyVnWyi9jUfI2eY4kSgD6vVje4gL2Bursto348Ps,8827
170
+ crawlee/storage_clients/_memory/_key_value_store_client.py,sha256=rBXRggALe-0kBAe03sdLVkABhkEFmHqXRabR28IugUE,6472
171
+ crawlee/storage_clients/_memory/_request_queue_client.py,sha256=hPI78S1sOopVKFKDRW7ndkha7TVcJfwdd4onXgfb4Pk,13096
172
172
  crawlee/storage_clients/_memory/_storage_client.py,sha256=EyiH-MgM_6iBItjmy2SkWAdjVbviacnxr3la-yiGfIw,2724
173
173
  crawlee/storage_clients/_memory/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
174
174
  crawlee/storage_clients/_sql/__init__.py,sha256=X_fDMc6jn50gEBZ9QyUw54sjovYfFvE-dgXAdci6Y2M,312
175
175
  crawlee/storage_clients/_sql/_client_mixin.py,sha256=U9ThDUuRbT5JDtCFlBurhZIs1Ay5t9fTfPXXI_4dwHY,15988
176
- crawlee/storage_clients/_sql/_dataset_client.py,sha256=jr3hD34qhGnV7teS56tJD8psUDhp3kT12Gk-HQ0MwT4,10209
176
+ crawlee/storage_clients/_sql/_dataset_client.py,sha256=tiJVvOPZgc7cy4kGfWnun-g2TJMHMdaLnoqns5Sl6ek,10203
177
177
  crawlee/storage_clients/_sql/_db_models.py,sha256=Gs4MS1YL0gWaUfNReVKJUXsqbU_d5jxiyvZ0sFxAV2A,9845
178
- crawlee/storage_clients/_sql/_key_value_store_client.py,sha256=1qwl298ThVybUJgfyQaKXMhWNOieEHa1TVeXWVb1YO0,11201
179
- crawlee/storage_clients/_sql/_request_queue_client.py,sha256=V2mUhQTe8pbEBb_4GNsI12i5Okr3ILLKyAHw8irdwLo,28936
178
+ crawlee/storage_clients/_sql/_key_value_store_client.py,sha256=D0nQoStq9PR0RTn9ZORKuTcRP7X-_2aDrLKgb2hKWM0,11189
179
+ crawlee/storage_clients/_sql/_request_queue_client.py,sha256=iavp-G62ApPtPmKePYviaNOFDXDg7QN9ozPHau6C1TY,28925
180
180
  crawlee/storage_clients/_sql/_storage_client.py,sha256=3xfgUcdW7Pu_j3SDYFzAdnU81jl1CmZ9Z5_NLvNi4P8,10913
181
181
  crawlee/storage_clients/_sql/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
182
182
  crawlee/storages/__init__.py,sha256=wc2eioyCKAAYrg4N7cshpjC-UbE23OzGar9nK_kteSY,186
183
- crawlee/storages/_base.py,sha256=Ka9RdYnEYlNLiKJGWqSwEDOPv5AtJWXKZErSTesh42Y,2124
184
- crawlee/storages/_dataset.py,sha256=T2C8Q3gDUWg8p0JBS5MHnCuSd252oDU61JuNZGZzg-I,14573
185
- crawlee/storages/_key_value_store.py,sha256=3oI5hVoM_NpTQVKXCbQCmb0sZhW7vN2oXQo-Yxi7BLQ,10127
186
- crawlee/storages/_request_queue.py,sha256=jt-d-NkI9lAorLssoI2r_lZjeEipe-5Cn6z9bfQqY3k,13154
187
- crawlee/storages/_storage_instance_manager.py,sha256=iFX3ymsIXyTg8tMHtx5Wn9XyaC77dIf15GpuggsJPDM,7821
183
+ crawlee/storages/_base.py,sha256=zUOcMJTg8MAzq-m9X1NJcWncCfxzI5mb5MyY35WAkMk,2310
184
+ crawlee/storages/_dataset.py,sha256=l3VJCaJnaAEhJFpfRUOLzIbW332R8gdEPSSGhLq65pg,14652
185
+ crawlee/storages/_key_value_store.py,sha256=ik--ZPCzOiG5hmm6k5LNH_FO9P3MoW0UvTKiGVf1RIY,10206
186
+ crawlee/storages/_request_queue.py,sha256=bjBOGbpMaGUsqJPVB-JD2VShziPAYMI-GvWKKpylzDE,13233
187
+ crawlee/storages/_storage_instance_manager.py,sha256=72n0YlPwNpSQDJSPf4TxnI2GvIK6L-ZiTmHRbFcoVU0,8164
188
+ crawlee/storages/_utils.py,sha256=Yz-5tEBYKYCFJemYT29--uGJqoJLApLDLgPcsnbifRw,439
188
189
  crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
189
- crawlee-1.0.1b12.dist-info/METADATA,sha256=e_iELUmxHTEAh14KViZnMgiKSnNe4nzGAlBDel2AO0I,29315
190
- crawlee-1.0.1b12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
191
- crawlee-1.0.1b12.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
192
- crawlee-1.0.1b12.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
193
- crawlee-1.0.1b12.dist-info/RECORD,,
190
+ crawlee-1.0.2.dist-info/METADATA,sha256=eep5-BK6U8RekRkE33CNSr-A3LlSvuZ9vujpHJJvu-o,29320
191
+ crawlee-1.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
192
+ crawlee-1.0.2.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
193
+ crawlee-1.0.2.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
194
+ crawlee-1.0.2.dist-info/RECORD,,