crawlee 1.0.2b2__py3-none-any.whl → 1.0.2b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlee might be problematic. Click here for more details.

crawlee/storages/_base.py CHANGED
@@ -44,7 +44,9 @@ class Storage(ABC):
44
44
 
45
45
  Args:
46
46
  id: The storage ID.
47
- name: The storage name (global scope, persists across runs).
47
+ name: The storage name (global scope, persists across runs). Name can only contain letters "a" through "z",
48
+ the digits "0" through "9", and the hyphen ("-") but only in the middle of the string
49
+ (e.g. "my-value-1").
48
50
  alias: The storage alias (run scope, creates unnamed storage).
49
51
  configuration: Configuration object used during the storage creation or restoration process.
50
52
  storage_client: Underlying storage client to use. If not provided, the default global storage client
@@ -12,6 +12,7 @@ from crawlee._utils.file import export_csv_to_stream, export_json_to_stream
12
12
 
13
13
  from ._base import Storage
14
14
  from ._key_value_store import KeyValueStore
15
+ from ._utils import validate_storage_name
15
16
 
16
17
  if TYPE_CHECKING:
17
18
  from collections.abc import AsyncIterator
@@ -75,6 +76,8 @@ class Dataset(Storage):
75
76
  id: The unique identifier of the storage.
76
77
  name: The name of the storage, if available.
77
78
  """
79
+ validate_storage_name(name)
80
+
78
81
  self._client = client
79
82
  self._id = id
80
83
  self._name = name
@@ -15,6 +15,7 @@ from crawlee._utils.recoverable_state import RecoverableState
15
15
  from crawlee.storage_clients.models import KeyValueStoreMetadata
16
16
 
17
17
  from ._base import Storage
18
+ from ._utils import validate_storage_name
18
19
 
19
20
  if TYPE_CHECKING:
20
21
  from collections.abc import AsyncIterator
@@ -84,6 +85,8 @@ class KeyValueStore(Storage):
84
85
  id: The unique identifier of the storage.
85
86
  name: The name of the storage, if available.
86
87
  """
88
+ validate_storage_name(name)
89
+
87
90
  self._client = client
88
91
  self._id = id
89
92
  self._name = name
@@ -13,6 +13,7 @@ from crawlee._utils.wait import wait_for_all_tasks_for_finish
13
13
  from crawlee.request_loaders import RequestManager
14
14
 
15
15
  from ._base import Storage
16
+ from ._utils import validate_storage_name
16
17
 
17
18
  if TYPE_CHECKING:
18
19
  from collections.abc import Sequence
@@ -80,6 +81,8 @@ class RequestQueue(Storage, RequestManager):
80
81
  id: The unique identifier of the storage.
81
82
  name: The name of the storage, if available.
82
83
  """
84
+ validate_storage_name(name)
85
+
83
86
  self._client = client
84
87
  self._id = id
85
88
  self._name = name
@@ -8,6 +8,8 @@ from typing import TYPE_CHECKING, TypeVar
8
8
  from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
9
9
  from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, RequestQueueClient
10
10
 
11
+ from ._utils import validate_storage_name
12
+
11
13
  if TYPE_CHECKING:
12
14
  from ._base import Storage
13
15
 
@@ -90,7 +92,9 @@ class StorageInstanceManager:
90
92
  Args:
91
93
  cls: The storage class to instantiate.
92
94
  id: Storage ID.
93
- name: Storage name. (global scope, persists across runs).
95
+ name: Storage name. (global scope, persists across runs). Name can only contain letters "a" through "z",
96
+ the digits "0" through "9", and the hyphen ("-") but only in the middle of the string
97
+ (e.g. "my-value-1").
94
98
  alias: Storage alias (run scope, creates unnamed storage).
95
99
  client_opener_coro: Coroutine to open the storage client when storage instance not found in cache.
96
100
  storage_client_cache_key: Additional optional key from storage client to differentiate cache entries.
@@ -146,6 +150,10 @@ class StorageInstanceManager:
146
150
  f'Use a different name or drop the existing alias storage first.'
147
151
  )
148
152
 
153
+ # Validate storage name
154
+ if name is not None:
155
+ validate_storage_name(name)
156
+
149
157
  # Create new instance
150
158
  client: KeyValueStoreClient | DatasetClient | RequestQueueClient
151
159
  client = await client_opener_coro
@@ -0,0 +1,11 @@
1
+ import re
2
+
3
+ NAME_REGEX = re.compile(r'^([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])$')
4
+
5
+
6
+ def validate_storage_name(name: str | None) -> None:
7
+ if name and not NAME_REGEX.match(name):
8
+ raise ValueError(
9
+ f'Invalid storage name "{name}". Name can only contain letters "a" through "z", the digits "0" through'
10
+ '"9", and the hyphen ("-") but only in the middle of the string (e.g. "my-value-1")'
11
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.0.2b2
3
+ Version: 1.0.2b4
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -180,14 +180,15 @@ crawlee/storage_clients/_sql/_request_queue_client.py,sha256=iavp-G62ApPtPmKePYv
180
180
  crawlee/storage_clients/_sql/_storage_client.py,sha256=3xfgUcdW7Pu_j3SDYFzAdnU81jl1CmZ9Z5_NLvNi4P8,10913
181
181
  crawlee/storage_clients/_sql/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
182
182
  crawlee/storages/__init__.py,sha256=wc2eioyCKAAYrg4N7cshpjC-UbE23OzGar9nK_kteSY,186
183
- crawlee/storages/_base.py,sha256=Ka9RdYnEYlNLiKJGWqSwEDOPv5AtJWXKZErSTesh42Y,2124
184
- crawlee/storages/_dataset.py,sha256=T2C8Q3gDUWg8p0JBS5MHnCuSd252oDU61JuNZGZzg-I,14573
185
- crawlee/storages/_key_value_store.py,sha256=3oI5hVoM_NpTQVKXCbQCmb0sZhW7vN2oXQo-Yxi7BLQ,10127
186
- crawlee/storages/_request_queue.py,sha256=jt-d-NkI9lAorLssoI2r_lZjeEipe-5Cn6z9bfQqY3k,13154
187
- crawlee/storages/_storage_instance_manager.py,sha256=iFX3ymsIXyTg8tMHtx5Wn9XyaC77dIf15GpuggsJPDM,7821
183
+ crawlee/storages/_base.py,sha256=zUOcMJTg8MAzq-m9X1NJcWncCfxzI5mb5MyY35WAkMk,2310
184
+ crawlee/storages/_dataset.py,sha256=l3VJCaJnaAEhJFpfRUOLzIbW332R8gdEPSSGhLq65pg,14652
185
+ crawlee/storages/_key_value_store.py,sha256=ik--ZPCzOiG5hmm6k5LNH_FO9P3MoW0UvTKiGVf1RIY,10206
186
+ crawlee/storages/_request_queue.py,sha256=bjBOGbpMaGUsqJPVB-JD2VShziPAYMI-GvWKKpylzDE,13233
187
+ crawlee/storages/_storage_instance_manager.py,sha256=72n0YlPwNpSQDJSPf4TxnI2GvIK6L-ZiTmHRbFcoVU0,8164
188
+ crawlee/storages/_utils.py,sha256=Yz-5tEBYKYCFJemYT29--uGJqoJLApLDLgPcsnbifRw,439
188
189
  crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
189
- crawlee-1.0.2b2.dist-info/METADATA,sha256=t0naR_tcjn5u_u1v5ksG0sa8WzOwOZ0HryOIJ-6tKvE,29314
190
- crawlee-1.0.2b2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
191
- crawlee-1.0.2b2.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
192
- crawlee-1.0.2b2.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
193
- crawlee-1.0.2b2.dist-info/RECORD,,
190
+ crawlee-1.0.2b4.dist-info/METADATA,sha256=YlX57uGdGcahWhpTusPjBchIq9y47R9WnXHSka5T0PU,29314
191
+ crawlee-1.0.2b4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
192
+ crawlee-1.0.2b4.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
193
+ crawlee-1.0.2b4.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
194
+ crawlee-1.0.2b4.dist-info/RECORD,,