crawlee 1.0.2b2__py3-none-any.whl → 1.0.2b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlee might be problematic. Click here for more details.
- crawlee/storages/_base.py +3 -1
- crawlee/storages/_dataset.py +3 -0
- crawlee/storages/_key_value_store.py +3 -0
- crawlee/storages/_request_queue.py +3 -0
- crawlee/storages/_storage_instance_manager.py +9 -1
- crawlee/storages/_utils.py +11 -0
- {crawlee-1.0.2b2.dist-info → crawlee-1.0.2b4.dist-info}/METADATA +1 -1
- {crawlee-1.0.2b2.dist-info → crawlee-1.0.2b4.dist-info}/RECORD +11 -10
- {crawlee-1.0.2b2.dist-info → crawlee-1.0.2b4.dist-info}/WHEEL +0 -0
- {crawlee-1.0.2b2.dist-info → crawlee-1.0.2b4.dist-info}/entry_points.txt +0 -0
- {crawlee-1.0.2b2.dist-info → crawlee-1.0.2b4.dist-info}/licenses/LICENSE +0 -0
crawlee/storages/_base.py
CHANGED
|
@@ -44,7 +44,9 @@ class Storage(ABC):
|
|
|
44
44
|
|
|
45
45
|
Args:
|
|
46
46
|
id: The storage ID.
|
|
47
|
-
name: The storage name (global scope, persists across runs).
|
|
47
|
+
name: The storage name (global scope, persists across runs). Name can only contain letters "a" through "z",
|
|
48
|
+
the digits "0" through "9", and the hyphen ("-") but only in the middle of the string
|
|
49
|
+
(e.g. "my-value-1").
|
|
48
50
|
alias: The storage alias (run scope, creates unnamed storage).
|
|
49
51
|
configuration: Configuration object used during the storage creation or restoration process.
|
|
50
52
|
storage_client: Underlying storage client to use. If not provided, the default global storage client
|
crawlee/storages/_dataset.py
CHANGED
|
@@ -12,6 +12,7 @@ from crawlee._utils.file import export_csv_to_stream, export_json_to_stream
|
|
|
12
12
|
|
|
13
13
|
from ._base import Storage
|
|
14
14
|
from ._key_value_store import KeyValueStore
|
|
15
|
+
from ._utils import validate_storage_name
|
|
15
16
|
|
|
16
17
|
if TYPE_CHECKING:
|
|
17
18
|
from collections.abc import AsyncIterator
|
|
@@ -75,6 +76,8 @@ class Dataset(Storage):
|
|
|
75
76
|
id: The unique identifier of the storage.
|
|
76
77
|
name: The name of the storage, if available.
|
|
77
78
|
"""
|
|
79
|
+
validate_storage_name(name)
|
|
80
|
+
|
|
78
81
|
self._client = client
|
|
79
82
|
self._id = id
|
|
80
83
|
self._name = name
|
|
@@ -15,6 +15,7 @@ from crawlee._utils.recoverable_state import RecoverableState
|
|
|
15
15
|
from crawlee.storage_clients.models import KeyValueStoreMetadata
|
|
16
16
|
|
|
17
17
|
from ._base import Storage
|
|
18
|
+
from ._utils import validate_storage_name
|
|
18
19
|
|
|
19
20
|
if TYPE_CHECKING:
|
|
20
21
|
from collections.abc import AsyncIterator
|
|
@@ -84,6 +85,8 @@ class KeyValueStore(Storage):
|
|
|
84
85
|
id: The unique identifier of the storage.
|
|
85
86
|
name: The name of the storage, if available.
|
|
86
87
|
"""
|
|
88
|
+
validate_storage_name(name)
|
|
89
|
+
|
|
87
90
|
self._client = client
|
|
88
91
|
self._id = id
|
|
89
92
|
self._name = name
|
|
@@ -13,6 +13,7 @@ from crawlee._utils.wait import wait_for_all_tasks_for_finish
|
|
|
13
13
|
from crawlee.request_loaders import RequestManager
|
|
14
14
|
|
|
15
15
|
from ._base import Storage
|
|
16
|
+
from ._utils import validate_storage_name
|
|
16
17
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
18
19
|
from collections.abc import Sequence
|
|
@@ -80,6 +81,8 @@ class RequestQueue(Storage, RequestManager):
|
|
|
80
81
|
id: The unique identifier of the storage.
|
|
81
82
|
name: The name of the storage, if available.
|
|
82
83
|
"""
|
|
84
|
+
validate_storage_name(name)
|
|
85
|
+
|
|
83
86
|
self._client = client
|
|
84
87
|
self._id = id
|
|
85
88
|
self._name = name
|
|
@@ -8,6 +8,8 @@ from typing import TYPE_CHECKING, TypeVar
|
|
|
8
8
|
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
|
|
9
9
|
from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, RequestQueueClient
|
|
10
10
|
|
|
11
|
+
from ._utils import validate_storage_name
|
|
12
|
+
|
|
11
13
|
if TYPE_CHECKING:
|
|
12
14
|
from ._base import Storage
|
|
13
15
|
|
|
@@ -90,7 +92,9 @@ class StorageInstanceManager:
|
|
|
90
92
|
Args:
|
|
91
93
|
cls: The storage class to instantiate.
|
|
92
94
|
id: Storage ID.
|
|
93
|
-
name: Storage name. (global scope, persists across runs).
|
|
95
|
+
name: Storage name. (global scope, persists across runs). Name can only contain letters "a" through "z",
|
|
96
|
+
the digits "0" through "9", and the hyphen ("-") but only in the middle of the string
|
|
97
|
+
(e.g. "my-value-1").
|
|
94
98
|
alias: Storage alias (run scope, creates unnamed storage).
|
|
95
99
|
client_opener_coro: Coroutine to open the storage client when storage instance not found in cache.
|
|
96
100
|
storage_client_cache_key: Additional optional key from storage client to differentiate cache entries.
|
|
@@ -146,6 +150,10 @@ class StorageInstanceManager:
|
|
|
146
150
|
f'Use a different name or drop the existing alias storage first.'
|
|
147
151
|
)
|
|
148
152
|
|
|
153
|
+
# Validate storage name
|
|
154
|
+
if name is not None:
|
|
155
|
+
validate_storage_name(name)
|
|
156
|
+
|
|
149
157
|
# Create new instance
|
|
150
158
|
client: KeyValueStoreClient | DatasetClient | RequestQueueClient
|
|
151
159
|
client = await client_opener_coro
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
NAME_REGEX = re.compile(r'^([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9-]*[a-zA-Z0-9])$')
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def validate_storage_name(name: str | None) -> None:
|
|
7
|
+
if name and not NAME_REGEX.match(name):
|
|
8
|
+
raise ValueError(
|
|
9
|
+
f'Invalid storage name "{name}". Name can only contain letters "a" through "z", the digits "0" through'
|
|
10
|
+
'"9", and the hyphen ("-") but only in the middle of the string (e.g. "my-value-1")'
|
|
11
|
+
)
|
|
@@ -180,14 +180,15 @@ crawlee/storage_clients/_sql/_request_queue_client.py,sha256=iavp-G62ApPtPmKePYv
|
|
|
180
180
|
crawlee/storage_clients/_sql/_storage_client.py,sha256=3xfgUcdW7Pu_j3SDYFzAdnU81jl1CmZ9Z5_NLvNi4P8,10913
|
|
181
181
|
crawlee/storage_clients/_sql/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
182
182
|
crawlee/storages/__init__.py,sha256=wc2eioyCKAAYrg4N7cshpjC-UbE23OzGar9nK_kteSY,186
|
|
183
|
-
crawlee/storages/_base.py,sha256=
|
|
184
|
-
crawlee/storages/_dataset.py,sha256=
|
|
185
|
-
crawlee/storages/_key_value_store.py,sha256=
|
|
186
|
-
crawlee/storages/_request_queue.py,sha256=
|
|
187
|
-
crawlee/storages/_storage_instance_manager.py,sha256=
|
|
183
|
+
crawlee/storages/_base.py,sha256=zUOcMJTg8MAzq-m9X1NJcWncCfxzI5mb5MyY35WAkMk,2310
|
|
184
|
+
crawlee/storages/_dataset.py,sha256=l3VJCaJnaAEhJFpfRUOLzIbW332R8gdEPSSGhLq65pg,14652
|
|
185
|
+
crawlee/storages/_key_value_store.py,sha256=ik--ZPCzOiG5hmm6k5LNH_FO9P3MoW0UvTKiGVf1RIY,10206
|
|
186
|
+
crawlee/storages/_request_queue.py,sha256=bjBOGbpMaGUsqJPVB-JD2VShziPAYMI-GvWKKpylzDE,13233
|
|
187
|
+
crawlee/storages/_storage_instance_manager.py,sha256=72n0YlPwNpSQDJSPf4TxnI2GvIK6L-ZiTmHRbFcoVU0,8164
|
|
188
|
+
crawlee/storages/_utils.py,sha256=Yz-5tEBYKYCFJemYT29--uGJqoJLApLDLgPcsnbifRw,439
|
|
188
189
|
crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
189
|
-
crawlee-1.0.
|
|
190
|
-
crawlee-1.0.
|
|
191
|
-
crawlee-1.0.
|
|
192
|
-
crawlee-1.0.
|
|
193
|
-
crawlee-1.0.
|
|
190
|
+
crawlee-1.0.2b4.dist-info/METADATA,sha256=YlX57uGdGcahWhpTusPjBchIq9y47R9WnXHSka5T0PU,29314
|
|
191
|
+
crawlee-1.0.2b4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
192
|
+
crawlee-1.0.2b4.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
|
|
193
|
+
crawlee-1.0.2b4.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
194
|
+
crawlee-1.0.2b4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|