datachain 0.21.1__py3-none-any.whl → 0.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/__init__.py +2 -0
- datachain/cache.py +2 -2
- datachain/catalog/catalog.py +213 -65
- datachain/cli/__init__.py +0 -7
- datachain/cli/commands/datasets.py +35 -26
- datachain/cli/commands/ls.py +2 -2
- datachain/cli/parser/__init__.py +1 -35
- datachain/client/fsspec.py +5 -3
- datachain/client/hf.py +10 -0
- datachain/client/local.py +4 -4
- datachain/data_storage/metastore.py +433 -37
- datachain/data_storage/sqlite.py +140 -7
- datachain/data_storage/warehouse.py +26 -7
- datachain/dataset.py +128 -12
- datachain/delta.py +11 -7
- datachain/error.py +36 -0
- datachain/func/func.py +1 -1
- datachain/lib/arrow.py +3 -3
- datachain/lib/dataset_info.py +4 -0
- datachain/lib/dc/datachain.py +253 -91
- datachain/lib/dc/datasets.py +103 -50
- datachain/lib/dc/listings.py +3 -3
- datachain/lib/dc/records.py +2 -1
- datachain/lib/dc/storage.py +38 -40
- datachain/lib/file.py +77 -23
- datachain/lib/listing.py +3 -1
- datachain/lib/meta_formats.py +1 -1
- datachain/lib/namespaces.py +71 -0
- datachain/lib/projects.py +86 -0
- datachain/lib/pytorch.py +1 -1
- datachain/lib/settings.py +10 -0
- datachain/lib/signal_schema.py +8 -0
- datachain/lib/tar.py +1 -2
- datachain/lib/udf.py +1 -1
- datachain/lib/udf_signature.py +1 -1
- datachain/lib/webdataset.py +30 -20
- datachain/listing.py +3 -1
- datachain/namespace.py +65 -0
- datachain/project.py +78 -0
- datachain/query/dataset.py +71 -46
- datachain/query/session.py +1 -1
- datachain/remote/studio.py +61 -26
- datachain/studio.py +23 -6
- {datachain-0.21.1.dist-info → datachain-0.23.0.dist-info}/METADATA +2 -2
- {datachain-0.21.1.dist-info → datachain-0.23.0.dist-info}/RECORD +49 -45
- {datachain-0.21.1.dist-info → datachain-0.23.0.dist-info}/WHEEL +0 -0
- {datachain-0.21.1.dist-info → datachain-0.23.0.dist-info}/entry_points.txt +0 -0
- {datachain-0.21.1.dist-info → datachain-0.23.0.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.21.1.dist-info → datachain-0.23.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from datachain.error import NamespaceCreateNotAllowedError
|
|
4
|
+
from datachain.namespace import Namespace
|
|
5
|
+
from datachain.query import Session
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def create(
|
|
9
|
+
name: str, descr: Optional[str] = None, session: Optional[Session] = None
|
|
10
|
+
) -> Namespace:
|
|
11
|
+
"""
|
|
12
|
+
Creates a new namespace.
|
|
13
|
+
|
|
14
|
+
Namespaces organize projects, which in turn organize datasets. A default
|
|
15
|
+
namespace always exists and is used if none is specified. Multiple namespaces
|
|
16
|
+
can be created in Studio, but only the default is available in the CLI.
|
|
17
|
+
|
|
18
|
+
Parameters:
|
|
19
|
+
name: Name of the new namespace.
|
|
20
|
+
descr: Optional description of the namespace.
|
|
21
|
+
session: Optional session to use for the operation.
|
|
22
|
+
|
|
23
|
+
Example:
|
|
24
|
+
```py
|
|
25
|
+
from datachain.lib.namespaces import create as create_namespace
|
|
26
|
+
namespace = create_namespace("dev", "Dev namespace")
|
|
27
|
+
```
|
|
28
|
+
"""
|
|
29
|
+
session = Session.get(session)
|
|
30
|
+
|
|
31
|
+
if not session.catalog.metastore.namespace_allowed_to_create:
|
|
32
|
+
raise NamespaceCreateNotAllowedError("Creating namespace is not allowed")
|
|
33
|
+
|
|
34
|
+
Namespace.validate_name(name)
|
|
35
|
+
|
|
36
|
+
return session.catalog.metastore.create_namespace(name, descr)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get(name: str, session: Optional[Session] = None) -> Namespace:
|
|
40
|
+
"""
|
|
41
|
+
Gets a namespace by name.
|
|
42
|
+
If the namespace is not found, a `NamespaceNotFoundError` is raised.
|
|
43
|
+
|
|
44
|
+
Parameters:
|
|
45
|
+
name : The name of the namespace.
|
|
46
|
+
session : Session to use for getting namespace.
|
|
47
|
+
|
|
48
|
+
Example:
|
|
49
|
+
```py
|
|
50
|
+
import datachain as dc
|
|
51
|
+
namespace = dc.get_namespace("local")
|
|
52
|
+
```
|
|
53
|
+
"""
|
|
54
|
+
session = Session.get(session)
|
|
55
|
+
return session.catalog.metastore.get_namespace(name)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def ls(session: Optional[Session] = None) -> list[Namespace]:
|
|
59
|
+
"""
|
|
60
|
+
Gets a list of all namespaces.
|
|
61
|
+
|
|
62
|
+
Parameters:
|
|
63
|
+
session : Session to use for getting namespaces.
|
|
64
|
+
|
|
65
|
+
Example:
|
|
66
|
+
```py
|
|
67
|
+
from datachain.lib.namespaces import ls as ls_namespaces
|
|
68
|
+
namespaces = ls_namespaces()
|
|
69
|
+
```
|
|
70
|
+
"""
|
|
71
|
+
return Session.get(session).catalog.metastore.list_namespaces()
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from datachain.error import ProjectCreateNotAllowedError
|
|
4
|
+
from datachain.project import Project
|
|
5
|
+
from datachain.query import Session
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def create(
|
|
9
|
+
namespace: str,
|
|
10
|
+
name: str,
|
|
11
|
+
descr: Optional[str] = None,
|
|
12
|
+
session: Optional[Session] = None,
|
|
13
|
+
) -> Project:
|
|
14
|
+
"""
|
|
15
|
+
Creates a new project under a specified namespace.
|
|
16
|
+
|
|
17
|
+
Projects help organize datasets. A default project is always available,
|
|
18
|
+
but users can create additional ones (only in Studio, not via CLI).
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
Parameters:
|
|
22
|
+
name: Name of the new project.
|
|
23
|
+
namespace: Namespace to create the project in. Created if it doesn't exist.
|
|
24
|
+
descr: Optional description of the project.
|
|
25
|
+
session: Optional session to use for the operation.
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
```py
|
|
29
|
+
import datachain as dc
|
|
30
|
+
project = dc.create_project("dev", "my-project", "My personal project")
|
|
31
|
+
```
|
|
32
|
+
"""
|
|
33
|
+
session = Session.get(session)
|
|
34
|
+
|
|
35
|
+
if not session.catalog.metastore.project_allowed_to_create:
|
|
36
|
+
raise ProjectCreateNotAllowedError("Creating project is not allowed")
|
|
37
|
+
|
|
38
|
+
Project.validate_name(name)
|
|
39
|
+
|
|
40
|
+
return session.catalog.metastore.create_project(namespace, name, descr)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get(name: str, namespace: str, session: Optional[Session]) -> Project:
|
|
44
|
+
"""
|
|
45
|
+
Gets a project by name in some namespace.
|
|
46
|
+
If the project is not found, a `ProjectNotFoundError` is raised.
|
|
47
|
+
|
|
48
|
+
Parameters:
|
|
49
|
+
name : The name of the project.
|
|
50
|
+
namespace : The name of the namespace.
|
|
51
|
+
session : Session to use for getting project.
|
|
52
|
+
|
|
53
|
+
Example:
|
|
54
|
+
```py
|
|
55
|
+
import datachain as dc
|
|
56
|
+
from datachain.lib.projects import get as get_project
|
|
57
|
+
project = get_project("my-project", "local")
|
|
58
|
+
```
|
|
59
|
+
"""
|
|
60
|
+
return Session.get(session).catalog.metastore.get_project(name, namespace)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def ls(
|
|
64
|
+
namespace: Optional[str] = None, session: Optional[Session] = None
|
|
65
|
+
) -> list[Project]:
|
|
66
|
+
"""
|
|
67
|
+
Gets a list of projects in a specific namespace or from all namespaces.
|
|
68
|
+
|
|
69
|
+
Parameters:
|
|
70
|
+
namespace : An optional namespace name.
|
|
71
|
+
session : Session to use for getting project.
|
|
72
|
+
|
|
73
|
+
Example:
|
|
74
|
+
```py
|
|
75
|
+
import datachain as dc
|
|
76
|
+
from datachain.lib.projects import ls as ls_projects
|
|
77
|
+
local_namespace_projects = ls_projects("local")
|
|
78
|
+
all_projects = ls_projects()
|
|
79
|
+
```
|
|
80
|
+
"""
|
|
81
|
+
session = Session.get(session)
|
|
82
|
+
namespace_id = None
|
|
83
|
+
if namespace:
|
|
84
|
+
namespace_id = session.catalog.metastore.get_namespace(namespace).id
|
|
85
|
+
|
|
86
|
+
return session.catalog.metastore.list_projects(namespace_id)
|
datachain/lib/pytorch.py
CHANGED
|
@@ -130,7 +130,7 @@ class PytorchDataset(IterableDataset):
|
|
|
130
130
|
if self.num_samples > 0:
|
|
131
131
|
ds = ds.sample(self.num_samples)
|
|
132
132
|
ds = ds.chunk(total_rank, total_workers)
|
|
133
|
-
yield from ds.
|
|
133
|
+
yield from ds.to_iter()
|
|
134
134
|
|
|
135
135
|
def _iter_with_prefetch(self) -> Generator[tuple[Any], None, None]:
|
|
136
136
|
from datachain.lib.udf import _prefetch_inputs
|
datachain/lib/settings.py
CHANGED
|
@@ -14,12 +14,16 @@ class Settings:
|
|
|
14
14
|
workers=None,
|
|
15
15
|
min_task_size=None,
|
|
16
16
|
prefetch=None,
|
|
17
|
+
namespace=None,
|
|
18
|
+
project=None,
|
|
17
19
|
):
|
|
18
20
|
self._cache = cache
|
|
19
21
|
self.parallel = parallel
|
|
20
22
|
self._workers = workers
|
|
21
23
|
self.min_task_size = min_task_size
|
|
22
24
|
self.prefetch = prefetch
|
|
25
|
+
self.namespace = namespace
|
|
26
|
+
self.project = project
|
|
23
27
|
|
|
24
28
|
if not isinstance(cache, bool) and cache is not None:
|
|
25
29
|
raise SettingsError(
|
|
@@ -67,6 +71,10 @@ class Settings:
|
|
|
67
71
|
res["workers"] = self.workers
|
|
68
72
|
if self.min_task_size is not None:
|
|
69
73
|
res["min_task_size"] = self.min_task_size
|
|
74
|
+
if self.namespace is not None:
|
|
75
|
+
res["namespace"] = self.namespace
|
|
76
|
+
if self.project is not None:
|
|
77
|
+
res["project"] = self.project
|
|
70
78
|
return res
|
|
71
79
|
|
|
72
80
|
def add(self, settings: "Settings"):
|
|
@@ -74,5 +82,7 @@ class Settings:
|
|
|
74
82
|
self.parallel = settings.parallel or self.parallel
|
|
75
83
|
self._workers = settings._workers or self._workers
|
|
76
84
|
self.min_task_size = settings.min_task_size or self.min_task_size
|
|
85
|
+
self.namespace = settings.namespace or self.namespace
|
|
86
|
+
self.project = settings.project or self.project
|
|
77
87
|
if settings.prefetch is not None:
|
|
78
88
|
self.prefetch = settings.prefetch
|
datachain/lib/signal_schema.py
CHANGED
|
@@ -25,6 +25,7 @@ from pydantic import BaseModel, Field, create_model
|
|
|
25
25
|
from sqlalchemy import ColumnElement
|
|
26
26
|
from typing_extensions import Literal as LiteralEx
|
|
27
27
|
|
|
28
|
+
from datachain.func import literal
|
|
28
29
|
from datachain.func.func import Func
|
|
29
30
|
from datachain.lib.convert.python_to_sql import python_to_sql
|
|
30
31
|
from datachain.lib.convert.sql_to_python import sql_to_python
|
|
@@ -659,6 +660,7 @@ class SignalSchema:
|
|
|
659
660
|
|
|
660
661
|
def mutate(self, args_map: dict) -> "SignalSchema":
|
|
661
662
|
new_values = self.values.copy()
|
|
663
|
+
primitives = (bool, str, int, float)
|
|
662
664
|
|
|
663
665
|
for name, value in args_map.items():
|
|
664
666
|
if isinstance(value, Column) and value.name in self.values:
|
|
@@ -679,6 +681,12 @@ class SignalSchema:
|
|
|
679
681
|
# adding new signal with function
|
|
680
682
|
new_values[name] = value.get_result_type(self)
|
|
681
683
|
continue
|
|
684
|
+
if isinstance(value, primitives):
|
|
685
|
+
# For primitives, store the type, not the value
|
|
686
|
+
val = literal(value)
|
|
687
|
+
val.type = python_to_sql(type(value))()
|
|
688
|
+
new_values[name] = sql_to_python(val)
|
|
689
|
+
continue
|
|
682
690
|
if isinstance(value, ColumnElement):
|
|
683
691
|
# adding new signal
|
|
684
692
|
new_values[name] = sql_to_python(value)
|
datachain/lib/tar.py
CHANGED
|
@@ -6,12 +6,11 @@ from datachain.lib.file import File, TarVFile
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def build_tar_member(parent: File, info: tarfile.TarInfo) -> File:
|
|
9
|
-
new_parent = parent.get_full_name()
|
|
10
9
|
etag_string = "-".join([parent.etag, info.name, str(info.mtime)])
|
|
11
10
|
etag = hashlib.md5(etag_string.encode(), usedforsecurity=False).hexdigest()
|
|
12
11
|
return File(
|
|
13
12
|
source=parent.source,
|
|
14
|
-
path=f"{
|
|
13
|
+
path=f"{parent.path}/{info.name}",
|
|
15
14
|
version=parent.version,
|
|
16
15
|
size=info.size,
|
|
17
16
|
etag=etag,
|
datachain/lib/udf.py
CHANGED
|
@@ -309,7 +309,7 @@ async def _prefetch_input(
|
|
|
309
309
|
after_prefetch: "Callable[[], None]" = noop,
|
|
310
310
|
) -> T:
|
|
311
311
|
for obj in row:
|
|
312
|
-
if isinstance(obj, File) and await obj._prefetch(download_cb):
|
|
312
|
+
if isinstance(obj, File) and obj.path and await obj._prefetch(download_cb):
|
|
313
313
|
after_prefetch()
|
|
314
314
|
return row
|
|
315
315
|
|
datachain/lib/udf_signature.py
CHANGED
datachain/lib/webdataset.py
CHANGED
|
@@ -34,29 +34,29 @@ warnings.filterwarnings(
|
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
class WDSError(DataChainError):
|
|
37
|
-
def __init__(self,
|
|
38
|
-
super().__init__(f"WebDataset error '{
|
|
37
|
+
def __init__(self, tar_name: str, message: str):
|
|
38
|
+
super().__init__(f"WebDataset error '{tar_name}': {message}")
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class CoreFileDuplicationError(WDSError):
|
|
42
|
-
def __init__(self,
|
|
42
|
+
def __init__(self, tar_name: str, file1: str, file2: str):
|
|
43
43
|
super().__init__(
|
|
44
|
-
|
|
44
|
+
tar_name, f"duplication of files with core extensions: {file1}, {file2}"
|
|
45
45
|
)
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
class CoreFileNotFoundError(WDSError):
|
|
49
|
-
def __init__(self,
|
|
49
|
+
def __init__(self, tar_name: str, extensions: Sequence[str], stem: str):
|
|
50
50
|
super().__init__(
|
|
51
|
-
|
|
51
|
+
tar_name,
|
|
52
52
|
f"no files with the extensions '{','.join(extensions)}'"
|
|
53
53
|
f" were found for file stem {stem}",
|
|
54
54
|
)
|
|
55
55
|
|
|
56
56
|
|
|
57
57
|
class UnknownFileExtensionError(WDSError):
|
|
58
|
-
def __init__(self,
|
|
59
|
-
super().__init__(
|
|
58
|
+
def __init__(self, tar_name, name: str, ext: str):
|
|
59
|
+
super().__init__(tar_name, f"unknown extension '{ext}' for file '{name}'")
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
class WDSBasic(DataModel):
|
|
@@ -113,10 +113,10 @@ class Builder:
|
|
|
113
113
|
def __init__(
|
|
114
114
|
self,
|
|
115
115
|
tar_stream: File,
|
|
116
|
-
core_extensions:
|
|
116
|
+
core_extensions: Sequence[str],
|
|
117
117
|
wds_class: type[WDSBasic],
|
|
118
|
-
tar,
|
|
119
|
-
encoding="utf-8",
|
|
118
|
+
tar: tarfile.TarFile,
|
|
119
|
+
encoding: str = "utf-8",
|
|
120
120
|
):
|
|
121
121
|
self._core_extensions = core_extensions
|
|
122
122
|
self._tar_stream = tar_stream
|
|
@@ -145,18 +145,20 @@ class Builder:
|
|
|
145
145
|
if ext in self._core_extensions:
|
|
146
146
|
if self.state.core_file is not None:
|
|
147
147
|
raise CoreFileDuplicationError(
|
|
148
|
-
self._tar_stream, file.name, self.state.core_file.name
|
|
148
|
+
self._tar_stream.name, file.name, self.state.core_file.name
|
|
149
149
|
)
|
|
150
150
|
self.state.core_file = file
|
|
151
151
|
elif ext in self.state.data:
|
|
152
152
|
raise WDSError(
|
|
153
|
-
self._tar_stream,
|
|
153
|
+
self._tar_stream.name,
|
|
154
154
|
f"file with extension '.{ext}' already exists in the archive",
|
|
155
155
|
)
|
|
156
156
|
else:
|
|
157
157
|
type_ = self._get_type(ext)
|
|
158
158
|
if type_ is None:
|
|
159
|
-
raise UnknownFileExtensionError(
|
|
159
|
+
raise UnknownFileExtensionError(
|
|
160
|
+
self._tar_stream.name, fstream.name, ext
|
|
161
|
+
)
|
|
160
162
|
|
|
161
163
|
if issubclass(type_, WDSReadableSubclass):
|
|
162
164
|
reader = type_._reader
|
|
@@ -165,7 +167,7 @@ class Builder:
|
|
|
165
167
|
|
|
166
168
|
if reader is None:
|
|
167
169
|
raise WDSError(
|
|
168
|
-
self._tar_stream,
|
|
170
|
+
self._tar_stream.name,
|
|
169
171
|
f"unable to find a reader for type {type_}, extension .{ext}",
|
|
170
172
|
)
|
|
171
173
|
self.state.data[ext] = reader(self, file)
|
|
@@ -173,7 +175,7 @@ class Builder:
|
|
|
173
175
|
def produce(self):
|
|
174
176
|
if self.state.core_file is None:
|
|
175
177
|
raise CoreFileNotFoundError(
|
|
176
|
-
self._tar_stream, self._core_extensions, self.state.stem
|
|
178
|
+
self._tar_stream.name, self._core_extensions, self.state.stem
|
|
177
179
|
)
|
|
178
180
|
|
|
179
181
|
file = build_tar_member(self._tar_stream, self.state.core_file)
|
|
@@ -194,7 +196,13 @@ class Builder:
|
|
|
194
196
|
return anno
|
|
195
197
|
|
|
196
198
|
|
|
197
|
-
def get_tar_groups(
|
|
199
|
+
def get_tar_groups(
|
|
200
|
+
stream: File,
|
|
201
|
+
tar: tarfile.TarFile,
|
|
202
|
+
core_extensions: Sequence[str],
|
|
203
|
+
spec: type[WDSBasic],
|
|
204
|
+
encoding: str = "utf-8",
|
|
205
|
+
) -> Iterator[WDSBasic]:
|
|
198
206
|
builder = Builder(stream, core_extensions, spec, tar, encoding)
|
|
199
207
|
|
|
200
208
|
for item in sorted(tar.getmembers(), key=lambda m: Path(m.name).stem):
|
|
@@ -210,9 +218,11 @@ def get_tar_groups(stream, tar, core_extensions, spec, encoding="utf-8"):
|
|
|
210
218
|
|
|
211
219
|
|
|
212
220
|
def process_webdataset(
|
|
213
|
-
core_extensions: Sequence[str] = ("jpg", "png"),
|
|
214
|
-
|
|
215
|
-
|
|
221
|
+
core_extensions: Sequence[str] = ("jpg", "png"),
|
|
222
|
+
spec: type[WDSBasic] = WDSAllFile,
|
|
223
|
+
encoding: str = "utf-8",
|
|
224
|
+
) -> Callable[[File], Iterator]:
|
|
225
|
+
def wds_func(file: File) -> Iterator[spec]: # type: ignore[valid-type]
|
|
216
226
|
with file.open() as fd:
|
|
217
227
|
with tarfile.open(fileobj=fd) as tar:
|
|
218
228
|
yield from get_tar_groups(file, tar, core_extensions, spec, encoding)
|
datachain/listing.py
CHANGED
|
@@ -66,7 +66,9 @@ class Listing:
|
|
|
66
66
|
@cached_property
|
|
67
67
|
def dataset(self) -> "DatasetRecord":
|
|
68
68
|
assert self.dataset_name
|
|
69
|
-
return self.metastore.get_dataset(
|
|
69
|
+
return self.metastore.get_dataset(
|
|
70
|
+
self.dataset_name, self.metastore.listing_project.id
|
|
71
|
+
)
|
|
70
72
|
|
|
71
73
|
@cached_property
|
|
72
74
|
def dataset_rows(self):
|
datachain/namespace.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import builtins
|
|
2
|
+
from dataclasses import dataclass, fields
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Any, Optional, TypeVar
|
|
5
|
+
|
|
6
|
+
from datachain.error import InvalidNamespaceNameError
|
|
7
|
+
|
|
8
|
+
N = TypeVar("N", bound="Namespace")
|
|
9
|
+
NAMESPACE_NAME_RESERVED_CHARS = ["."]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class Namespace:
|
|
14
|
+
id: int
|
|
15
|
+
uuid: str
|
|
16
|
+
name: str
|
|
17
|
+
descr: Optional[str]
|
|
18
|
+
created_at: datetime
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def validate_name(name: str) -> None:
|
|
22
|
+
"""Throws exception if name is invalid, otherwise returns None"""
|
|
23
|
+
if not name:
|
|
24
|
+
raise InvalidNamespaceNameError("Namespace name cannot be empty")
|
|
25
|
+
|
|
26
|
+
for c in NAMESPACE_NAME_RESERVED_CHARS:
|
|
27
|
+
if c in name:
|
|
28
|
+
raise InvalidNamespaceNameError(
|
|
29
|
+
f"Character {c} is reserved and not allowed in namespace name"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
if name in [Namespace.default(), Namespace.system()]:
|
|
33
|
+
raise InvalidNamespaceNameError(
|
|
34
|
+
f"Namespace name {name} is reserved and cannot be used."
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
@staticmethod
|
|
38
|
+
def default() -> str:
|
|
39
|
+
"""Name of default namespace"""
|
|
40
|
+
return "local"
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def system() -> str:
|
|
44
|
+
"""Name of the system namespace"""
|
|
45
|
+
return "system"
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def is_system(self):
|
|
49
|
+
return self.name == Namespace.system()
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def parse(
|
|
53
|
+
cls: builtins.type[N],
|
|
54
|
+
id: int,
|
|
55
|
+
uuid: str,
|
|
56
|
+
name: str,
|
|
57
|
+
descr: Optional[str],
|
|
58
|
+
created_at: datetime,
|
|
59
|
+
) -> "Namespace":
|
|
60
|
+
return cls(id, uuid, name, descr, created_at)
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def from_dict(cls, d: dict[str, Any]) -> "Namespace":
|
|
64
|
+
kwargs = {f.name: d[f.name] for f in fields(cls) if f.name in d}
|
|
65
|
+
return cls(**kwargs)
|
datachain/project.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import builtins
|
|
2
|
+
from dataclasses import dataclass, fields
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Any, Optional, TypeVar
|
|
5
|
+
|
|
6
|
+
from datachain.error import InvalidProjectNameError
|
|
7
|
+
from datachain.namespace import Namespace
|
|
8
|
+
|
|
9
|
+
P = TypeVar("P", bound="Project")
|
|
10
|
+
PROJECT_NAME_RESERVED_CHARS = ["."]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class Project:
|
|
15
|
+
id: int
|
|
16
|
+
uuid: str
|
|
17
|
+
name: str
|
|
18
|
+
descr: Optional[str]
|
|
19
|
+
created_at: datetime
|
|
20
|
+
namespace: Namespace
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def validate_name(name: str) -> None:
|
|
24
|
+
"""Throws exception if name is invalid, otherwise returns None"""
|
|
25
|
+
if not name:
|
|
26
|
+
raise InvalidProjectNameError("Project name cannot be empty")
|
|
27
|
+
|
|
28
|
+
for c in PROJECT_NAME_RESERVED_CHARS:
|
|
29
|
+
if c in name:
|
|
30
|
+
raise InvalidProjectNameError(
|
|
31
|
+
f"Character {c} is reserved and not allowed in project name."
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if name in [Project.default(), Project.listing()]:
|
|
35
|
+
raise InvalidProjectNameError(
|
|
36
|
+
f"Project name {name} is reserved and cannot be used."
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
@staticmethod
|
|
40
|
+
def default() -> str:
|
|
41
|
+
"""Name of default project"""
|
|
42
|
+
return "local"
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def listing() -> str:
|
|
46
|
+
"""Name of listing project where all listing datasets will be saved"""
|
|
47
|
+
return "listing"
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def parse(
|
|
51
|
+
cls: builtins.type[P],
|
|
52
|
+
namespace_id: int,
|
|
53
|
+
namespace_uuid: str,
|
|
54
|
+
namespace_name: str,
|
|
55
|
+
namespace_descr: Optional[str],
|
|
56
|
+
namespace_created_at: datetime,
|
|
57
|
+
project_id: int,
|
|
58
|
+
uuid: str,
|
|
59
|
+
name: str,
|
|
60
|
+
descr: Optional[str],
|
|
61
|
+
created_at: datetime,
|
|
62
|
+
project_namespace_id: int,
|
|
63
|
+
) -> "Project":
|
|
64
|
+
namespace = Namespace.parse(
|
|
65
|
+
namespace_id,
|
|
66
|
+
namespace_uuid,
|
|
67
|
+
namespace_name,
|
|
68
|
+
namespace_descr,
|
|
69
|
+
namespace_created_at,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
return cls(project_id, uuid, name, descr, created_at, namespace)
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def from_dict(cls, d: dict[str, Any]) -> "Project":
|
|
76
|
+
namespace = Namespace.from_dict(d.pop("namespace"))
|
|
77
|
+
kwargs = {f.name: d[f.name] for f in fields(cls) if f.name in d}
|
|
78
|
+
return cls(**kwargs, namespace=namespace)
|