datachain 0.31.4__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/__init__.py CHANGED
@@ -37,6 +37,7 @@ from datachain.lib.file import (
37
37
  VideoFrame,
38
38
  )
39
39
  from datachain.lib.model_store import ModelStore
40
+ from datachain.lib.namespaces import delete as delete_namespace
40
41
  from datachain.lib.projects import create as create_project
41
42
  from datachain.lib.udf import Aggregator, Generator, Mapper
42
43
  from datachain.lib.utils import AbstractUDF, DataChainError
@@ -74,6 +75,7 @@ __all__ = [
74
75
  "create_project",
75
76
  "datasets",
76
77
  "delete_dataset",
78
+ "delete_namespace",
77
79
  "is_chain_type",
78
80
  "is_studio",
79
81
  "listings",
@@ -22,6 +22,7 @@ from sqlalchemy import (
22
22
  UniqueConstraint,
23
23
  select,
24
24
  )
25
+ from sqlalchemy.sql import func as f
25
26
 
26
27
  from datachain.data_storage import JobQueryType, JobStatus
27
28
  from datachain.data_storage.serializer import Serializable
@@ -37,7 +38,9 @@ from datachain.dataset import (
37
38
  from datachain.error import (
38
39
  DatasetNotFoundError,
39
40
  DatasetVersionNotFoundError,
41
+ NamespaceDeleteNotAllowedError,
40
42
  NamespaceNotFoundError,
43
+ ProjectDeleteNotAllowedError,
41
44
  ProjectNotFoundError,
42
45
  TableMissingError,
43
46
  )
@@ -141,6 +144,10 @@ class AbstractMetastore(ABC, Serializable):
141
144
  def get_namespace(self, name: str, conn=None) -> Namespace:
142
145
  """Gets a single namespace by name"""
143
146
 
147
+ @abstractmethod
148
+ def remove_namespace(self, namespace_id: int, conn=None) -> None:
149
+ """Removes a single namespace by id"""
150
+
144
151
  @abstractmethod
145
152
  def list_namespaces(self, conn=None) -> list[Namespace]:
146
153
  """Gets a list of all namespaces"""
@@ -190,10 +197,30 @@ class AbstractMetastore(ABC, Serializable):
190
197
  It also creates project if not found and create flag is set to True.
191
198
  """
192
199
 
200
+ def is_default_project(self, project_name: str, namespace_name: str) -> bool:
201
+ return (
202
+ project_name == self.default_project_name
203
+ and namespace_name == self.default_namespace_name
204
+ )
205
+
206
+ def is_listing_project(self, project_name: str, namespace_name: str) -> bool:
207
+ return (
208
+ project_name == self.listing_project_name
209
+ and namespace_name == self.system_namespace_name
210
+ )
211
+
193
212
  @abstractmethod
194
213
  def get_project_by_id(self, project_id: int, conn=None) -> Project:
195
214
  """Gets a single project by id"""
196
215
 
216
+ @abstractmethod
217
+ def count_projects(self, namespace_id: Optional[int] = None) -> int:
218
+ """Counts projects in some namespace or in general."""
219
+
220
+ @abstractmethod
221
+ def remove_project(self, project_id: int, conn=None) -> None:
222
+ """Removes a single project by id"""
223
+
197
224
  @abstractmethod
198
225
  def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
199
226
  """Gets list of projects in some namespace or in general (in all namespaces)"""
@@ -270,6 +297,10 @@ class AbstractMetastore(ABC, Serializable):
270
297
  ) -> Iterator[DatasetListRecord]:
271
298
  """Lists all datasets in some project or in all projects."""
272
299
 
300
+ @abstractmethod
301
+ def count_datasets(self, project_id: Optional[int] = None) -> int:
302
+ """Counts datasets in some project or in all projects."""
303
+
273
304
  @abstractmethod
274
305
  def list_datasets_by_prefix(
275
306
  self, prefix: str, project_id: Optional[int] = None
@@ -735,6 +766,18 @@ class AbstractDBMetastore(AbstractMetastore):
735
766
 
736
767
  return self.get_namespace(name)
737
768
 
769
+ def remove_namespace(self, namespace_id: int, conn=None) -> None:
770
+ num_projects = self.count_projects(namespace_id)
771
+ if num_projects > 0:
772
+ raise NamespaceDeleteNotAllowedError(
773
+ f"Namespace cannot be removed. It contains {num_projects} project(s). "
774
+ "Please remove the project(s) first."
775
+ )
776
+
777
+ n = self._namespaces
778
+ with self.db.transaction():
779
+ self.db.execute(self._namespaces_delete().where(n.c.id == namespace_id))
780
+
738
781
  def get_namespace(self, name: str, conn=None) -> Namespace:
739
782
  """Gets a single namespace by name"""
740
783
  n = self._namespaces
@@ -796,18 +839,6 @@ class AbstractDBMetastore(AbstractMetastore):
796
839
 
797
840
  return self.get_project(name, namespace.name)
798
841
 
799
- def _is_listing_project(self, project_name: str, namespace_name: str) -> bool:
800
- return (
801
- project_name == self.listing_project_name
802
- and namespace_name == self.system_namespace_name
803
- )
804
-
805
- def _is_default_project(self, project_name: str, namespace_name: str) -> bool:
806
- return (
807
- project_name == self.default_project_name
808
- and namespace_name == self.default_namespace_name
809
- )
810
-
811
842
  def get_project(
812
843
  self, name: str, namespace_name: str, create: bool = False, conn=None
813
844
  ) -> Project:
@@ -816,7 +847,7 @@ class AbstractDBMetastore(AbstractMetastore):
816
847
  p = self._projects
817
848
  validate = True
818
849
 
819
- if self._is_listing_project(name, namespace_name) or self._is_default_project(
850
+ if self.is_listing_project(name, namespace_name) or self.is_default_project(
820
851
  name, namespace_name
821
852
  ):
822
853
  # we are always creating default and listing projects if they don't exist
@@ -858,7 +889,31 @@ class AbstractDBMetastore(AbstractMetastore):
858
889
  raise ProjectNotFoundError(f"Project with id {project_id} not found.")
859
890
  return self.project_class.parse(*rows[0])
860
891
 
861
- def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
892
+ def count_projects(self, namespace_id: Optional[int] = None) -> int:
893
+ p = self._projects
894
+ query = self._projects_select()
895
+ if namespace_id:
896
+ query = query.where(p.c.namespace_id == namespace_id)
897
+
898
+ query = select(f.count(1)).select_from(query.subquery())
899
+
900
+ return next(self.db.execute(query))[0]
901
+
902
+ def remove_project(self, project_id: int, conn=None) -> None:
903
+ num_datasets = self.count_datasets(project_id)
904
+ if num_datasets > 0:
905
+ raise ProjectDeleteNotAllowedError(
906
+ f"Project cannot be removed. It contains {num_datasets} dataset(s). "
907
+ "Please remove the dataset(s) first."
908
+ )
909
+
910
+ p = self._projects
911
+ with self.db.transaction():
912
+ self.db.execute(self._projects_delete().where(p.c.id == project_id))
913
+
914
+ def list_projects(
915
+ self, namespace_id: Optional[int] = None, conn=None
916
+ ) -> list[Project]:
862
917
  """
863
918
  Gets a list of projects inside some namespace, or in all namespaces
864
919
  """
@@ -1189,7 +1244,6 @@ class AbstractDBMetastore(AbstractMetastore):
1189
1244
  def list_datasets(
1190
1245
  self, project_id: Optional[int] = None
1191
1246
  ) -> Iterator["DatasetListRecord"]:
1192
- """Lists all datasets."""
1193
1247
  d = self._datasets
1194
1248
  query = self._base_list_datasets_query().order_by(
1195
1249
  self._datasets.c.name, self._datasets_versions.c.version
@@ -1198,6 +1252,16 @@ class AbstractDBMetastore(AbstractMetastore):
1198
1252
  query = query.where(d.c.project_id == project_id)
1199
1253
  yield from self._parse_dataset_list(self.db.execute(query))
1200
1254
 
1255
+ def count_datasets(self, project_id: Optional[int] = None) -> int:
1256
+ d = self._datasets
1257
+ query = self._datasets_select()
1258
+ if project_id:
1259
+ query = query.where(d.c.project_id == project_id)
1260
+
1261
+ query = select(f.count(1)).select_from(query.subquery())
1262
+
1263
+ return next(self.db.execute(query))[0]
1264
+
1201
1265
  def list_datasets_by_prefix(
1202
1266
  self, prefix: str, project_id: Optional[int] = None, conn=None
1203
1267
  ) -> Iterator["DatasetListRecord"]:
datachain/error.py CHANGED
@@ -34,6 +34,14 @@ class ProjectCreateNotAllowedError(NotAllowedError):
34
34
  pass
35
35
 
36
36
 
37
+ class ProjectDeleteNotAllowedError(NotAllowedError):
38
+ pass
39
+
40
+
41
+ class NamespaceDeleteNotAllowedError(NotAllowedError):
42
+ pass
43
+
44
+
37
45
  class ProjectNotFoundError(NotFoundError):
38
46
  pass
39
47
 
@@ -1,7 +1,11 @@
1
1
  from typing import Optional
2
2
 
3
- from datachain.error import NamespaceCreateNotAllowedError
4
- from datachain.namespace import Namespace
3
+ from datachain.error import (
4
+ NamespaceCreateNotAllowedError,
5
+ NamespaceDeleteNotAllowedError,
6
+ )
7
+ from datachain.lib.projects import delete as delete_project
8
+ from datachain.namespace import Namespace, parse_name
5
9
  from datachain.query import Session
6
10
 
7
11
 
@@ -71,3 +75,54 @@ def ls(session: Optional[Session] = None) -> list[Namespace]:
71
75
  ```
72
76
  """
73
77
  return Session.get(session).catalog.metastore.list_namespaces()
78
+
79
+
80
+ def delete(name: str, session: Optional[Session]) -> None:
81
+ """
82
+ Removes a namespace by name.
83
+
84
+ Raises:
85
+ NamespaceNotFoundError: If the namespace does not exist.
86
+ NamespaceDeleteNotAllowedError: If the namespace is non-empty,
87
+ is the default namespace, or is a system namespace,
88
+ as these cannot be removed.
89
+
90
+ Parameters:
91
+ name : The name of the namespace.
92
+ session : Session to use for getting project.
93
+
94
+ Example:
95
+ ```py
96
+ import datachain as dc
97
+ from datachain.lib.namespace import delete as delete_namespace
98
+ delete_namespace("dev")
99
+ ```
100
+ """
101
+ session = Session.get(session)
102
+ metastore = session.catalog.metastore
103
+
104
+ namespace_name, project_name = parse_name(name)
105
+
106
+ if project_name:
107
+ return delete_project(project_name, namespace_name, session)
108
+
109
+ namespace = metastore.get_namespace(name)
110
+
111
+ if name == metastore.system_namespace_name:
112
+ raise NamespaceDeleteNotAllowedError(
113
+ f"Namespace {metastore.system_namespace_name} cannot be removed"
114
+ )
115
+
116
+ if name == metastore.default_namespace_name:
117
+ raise NamespaceDeleteNotAllowedError(
118
+ f"Namespace {metastore.default_namespace_name} cannot be removed"
119
+ )
120
+
121
+ num_projects = metastore.count_projects(namespace.id)
122
+ if num_projects > 0:
123
+ raise NamespaceDeleteNotAllowedError(
124
+ f"Namespace cannot be removed. It contains {num_projects} project(s). "
125
+ "Please remove the project(s) first."
126
+ )
127
+
128
+ metastore.remove_namespace(namespace.id)
datachain/lib/projects.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from typing import Optional
2
2
 
3
- from datachain.error import ProjectCreateNotAllowedError
3
+ from datachain.error import ProjectCreateNotAllowedError, ProjectDeleteNotAllowedError
4
4
  from datachain.project import Project
5
5
  from datachain.query import Session
6
6
 
@@ -86,3 +86,49 @@ def ls(
86
86
  namespace_id = session.catalog.metastore.get_namespace(namespace).id
87
87
 
88
88
  return session.catalog.metastore.list_projects(namespace_id)
89
+
90
+
91
+ def delete(name: str, namespace: str, session: Optional[Session] = None) -> None:
92
+ """
93
+ Removes a project by name within a namespace.
94
+
95
+ Raises:
96
+ ProjectNotFoundError: If the project does not exist.
97
+ ProjectDeleteNotAllowedError: If the project is non-empty,
98
+ is the default project, or is a listing project,
99
+ as these cannot be removed.
100
+
101
+ Parameters:
102
+ name : The name of the project.
103
+ namespace : The name of the namespace.
104
+ session : Session to use for getting project.
105
+
106
+ Example:
107
+ ```py
108
+ import datachain as dc
109
+ dc.delete_project("my-project", "local")
110
+ ```
111
+ """
112
+ session = Session.get(session)
113
+ metastore = session.catalog.metastore
114
+
115
+ project = metastore.get_project(name, namespace)
116
+
117
+ if metastore.is_listing_project(name, namespace):
118
+ raise ProjectDeleteNotAllowedError(
119
+ f"Project {metastore.listing_project_name} cannot be removed"
120
+ )
121
+
122
+ if metastore.is_default_project(name, namespace):
123
+ raise ProjectDeleteNotAllowedError(
124
+ f"Project {metastore.default_project_name} cannot be removed"
125
+ )
126
+
127
+ num_datasets = metastore.count_datasets(project.id)
128
+ if num_datasets > 0:
129
+ raise ProjectDeleteNotAllowedError(
130
+ f"Project cannot be removed. It contains {num_datasets} dataset(s). "
131
+ "Please remove the dataset(s) first."
132
+ )
133
+
134
+ metastore.remove_project(project.id)
datachain/namespace.py CHANGED
@@ -9,6 +9,25 @@ N = TypeVar("N", bound="Namespace")
9
9
  NAMESPACE_NAME_RESERVED_CHARS = [".", "@"]
10
10
 
11
11
 
12
+ def parse_name(name: str) -> tuple[str, Optional[str]]:
13
+ """
14
+ Parses namespace name into namespace and optional project name.
15
+ If both namespace and project are defined in name, they need to be split by dot
16
+ e.g dev.my-project
17
+ Valid inputs:
18
+ - dev.my-project
19
+ - dev
20
+ """
21
+ parts = name.split(".")
22
+ if len(parts) == 1:
23
+ return name, None
24
+ if len(parts) == 2:
25
+ return parts[0], parts[1]
26
+ raise InvalidNamespaceNameError(
27
+ f"Invalid namespace format: {name}. Expected 'namespace' or 'ns1.ns2'."
28
+ )
29
+
30
+
12
31
  @dataclass(frozen=True)
13
32
  class Namespace:
14
33
  id: int
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.31.4
3
+ Version: 0.32.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -1,14 +1,14 @@
1
- datachain/__init__.py,sha256=Ze-u6SSNsTFBRFw0lVPCdoP0kt8ybKxJIhO8jfC22Cw,1744
1
+ datachain/__init__.py,sha256=5DFB1P58c35C_WBMrhmaynsP1WwCukC-9gTJIaPy0E8,1832
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
4
4
  datachain/cache.py,sha256=ESVRaCJXEThMIfGEFVHx6wJPOZA7FYk9V6WxjyuqUBY,3626
5
5
  datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
6
6
  datachain/dataset.py,sha256=ATGa-CBTFoZeTN2V40-zHEzfMBcdYK0WuoJ6H2yEAvo,25268
7
7
  datachain/delta.py,sha256=X5Lw6GQ8MAYNl2YIExNvl0tPIkylQEWwnCw0We7NtHM,10693
8
- datachain/error.py,sha256=OWwWMkzZYJrkcoEDGhJHMf7SfKvxcsOLRF94mjPf29I,1609
8
+ datachain/error.py,sha256=comKx1JCdjsBpxabrOWaiRP0aHBspBDZl1mkKFnBSq0,1739
9
9
  datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
10
10
  datachain/listing.py,sha256=aqayl5St3D9PwdwM6nR1STkpLSw-S3U8pudO9PWi3N8,7241
11
- datachain/namespace.py,sha256=MozcXYxedIbamzY56YKy9r9fgSpOm2VryhWfIf6stYk,1791
11
+ datachain/namespace.py,sha256=sgIF90KEaC_VlMFivDIJiFz8RUsTftMxW4kOUTyxo3A,2356
12
12
  datachain/node.py,sha256=KWDT0ClYXB7FYI-QOvzAa-UDkLJErUI2eWm5FBteYuU,5577
13
13
  datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
14
14
  datachain/nodes_thread_pool.py,sha256=mdo0s-VybuSZkRUARcUO4Tjh8KFfZr9foHqmupx2SmM,3989
@@ -49,7 +49,7 @@ datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
49
49
  datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
50
50
  datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
51
51
  datachain/data_storage/job.py,sha256=ZkeXCNUj_VCkoKYx29hqB4AcfVUielnRjY-GYUcUxt4,426
52
- datachain/data_storage/metastore.py,sha256=aSeTRh43hmrOhULi9YD2VlgCj8B4bjE3jqCOvnb_HQs,53851
52
+ datachain/data_storage/metastore.py,sha256=2-FNdrhV-UoE6ztzdVea2MLpjr80Mvf3M3kbDfd3jSs,56222
53
53
  datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
54
54
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
55
55
  datachain/data_storage/sqlite.py,sha256=1fIeIhmB3O8oQVzP8dDKap0KUIgI0n2TdBQSyv0R8J4,30345
@@ -82,8 +82,8 @@ datachain/lib/listing.py,sha256=U-2stsTEwEsq4Y80dqGfktGzkmB5-ZntnL1_rzXlH0k,7089
82
82
  datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
83
83
  datachain/lib/meta_formats.py,sha256=zdyg6XLk3QIsSk3I7s0Ez5kaCJSlE3uq7JiGxf7UwtU,6348
84
84
  datachain/lib/model_store.py,sha256=A0pSVQ7uaZ9RvANapzirF8Cqq9N6ysosPpMSkzdRPkU,3226
85
- datachain/lib/namespaces.py,sha256=I6gLC4ZzgyatFtHL85MWR4ml7-yuQOzxHE7IQNbt_ac,2107
86
- datachain/lib/projects.py,sha256=VJgmzHzKjmNPZD1tm0a1RNHmUQwn6WLWCLpKyc4UrSk,2605
85
+ datachain/lib/namespaces.py,sha256=ij67QHnRJhC8uquR21aD8u1Um2jfxnBX8PecuOQZpYw,3828
86
+ datachain/lib/projects.py,sha256=_YeU9PPcH_pC8-sbX-47XtWSdl1ltVKnALY8azWLJkM,4112
87
87
  datachain/lib/pytorch.py,sha256=S-st2SAczYut13KMf6eSqP_OQ8otWI5TRmzhK5fN3k0,7828
88
88
  datachain/lib/settings.py,sha256=xBQEPZfgaYKhHIFLd0u5CBTYDcJS8ZHCm47x7GJErFU,7666
89
89
  datachain/lib/signal_schema.py,sha256=YMMcc9gHIzBz88zfsreGa1nOoO_56HBtZlT6jf3V1WE,39224
@@ -161,9 +161,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
161
161
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
162
162
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
163
163
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
164
- datachain-0.31.4.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
165
- datachain-0.31.4.dist-info/METADATA,sha256=wqjT5wVjclsvbSjyXxABcJ46-JKCGT5t8-MJK55VApM,13898
166
- datachain-0.31.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
167
- datachain-0.31.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
168
- datachain-0.31.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
169
- datachain-0.31.4.dist-info/RECORD,,
164
+ datachain-0.32.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
165
+ datachain-0.32.0.dist-info/METADATA,sha256=gLsH5khLc-z_s0MKTt3H6qk_UXDAaWuHy2lk1yytgBw,13898
166
+ datachain-0.32.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
167
+ datachain-0.32.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
168
+ datachain-0.32.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
169
+ datachain-0.32.0.dist-info/RECORD,,