PyPI - datachain - Versions diffs - 0.14.2__py3-none-any.whl → 0.39.0__py3-none-any.whl - Mend

datachain 0.14.2py3-none-any.whl → 0.39.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

datachain/__init__.py +20 -0
datachain/asyn.py +11 -12
datachain/cache.py +7 -7
datachain/catalog/__init__.py +2 -2
datachain/catalog/catalog.py +621 -507
datachain/catalog/dependency.py +164 -0
datachain/catalog/loader.py +28 -18
datachain/checkpoint.py +43 -0
datachain/cli/__init__.py +24 -33
datachain/cli/commands/__init__.py +1 -8
datachain/cli/commands/datasets.py +83 -52
datachain/cli/commands/ls.py +17 -17
datachain/cli/commands/show.py +4 -4
datachain/cli/parser/__init__.py +8 -74
datachain/cli/parser/job.py +95 -3
datachain/cli/parser/studio.py +11 -4
datachain/cli/parser/utils.py +1 -2
datachain/cli/utils.py +2 -15
datachain/client/azure.py +4 -4
datachain/client/fsspec.py +45 -28
datachain/client/gcs.py +6 -6
datachain/client/hf.py +29 -2
datachain/client/http.py +157 -0
datachain/client/local.py +15 -11
datachain/client/s3.py +17 -9
datachain/config.py +4 -8
datachain/data_storage/db_engine.py +12 -6
datachain/data_storage/job.py +5 -1
datachain/data_storage/metastore.py +1252 -186
datachain/data_storage/schema.py +58 -45
datachain/data_storage/serializer.py +105 -15
datachain/data_storage/sqlite.py +286 -127
datachain/data_storage/warehouse.py +250 -113
datachain/dataset.py +353 -148
datachain/delta.py +391 -0
datachain/diff/__init__.py +27 -29
datachain/error.py +60 -0
datachain/func/__init__.py +2 -1
datachain/func/aggregate.py +66 -42
datachain/func/array.py +242 -38
datachain/func/base.py +7 -4
datachain/func/conditional.py +110 -60
datachain/func/func.py +96 -45
datachain/func/numeric.py +55 -38
datachain/func/path.py +32 -20
datachain/func/random.py +2 -2
datachain/func/string.py +67 -37
datachain/func/window.py +7 -8
datachain/hash_utils.py +123 -0
datachain/job.py +11 -7
datachain/json.py +138 -0
datachain/lib/arrow.py +58 -22
datachain/lib/audio.py +245 -0
datachain/lib/clip.py +14 -13
datachain/lib/convert/flatten.py +5 -3
datachain/lib/convert/python_to_sql.py +6 -10
datachain/lib/convert/sql_to_python.py +8 -0
datachain/lib/convert/values_to_tuples.py +156 -51
datachain/lib/data_model.py +42 -20
datachain/lib/dataset_info.py +36 -8
datachain/lib/dc/__init__.py +8 -2
datachain/lib/dc/csv.py +25 -28
datachain/lib/dc/database.py +398 -0
datachain/lib/dc/datachain.py +1289 -425
datachain/lib/dc/datasets.py +320 -38
datachain/lib/dc/hf.py +38 -24
datachain/lib/dc/json.py +29 -32
datachain/lib/dc/listings.py +112 -8
datachain/lib/dc/pandas.py +16 -12
datachain/lib/dc/parquet.py +35 -23
datachain/lib/dc/records.py +31 -23
datachain/lib/dc/storage.py +154 -64
datachain/lib/dc/storage_pattern.py +251 -0
datachain/lib/dc/utils.py +24 -16
datachain/lib/dc/values.py +8 -9
datachain/lib/file.py +622 -89
datachain/lib/hf.py +69 -39
datachain/lib/image.py +14 -14
datachain/lib/listing.py +14 -11
datachain/lib/listing_info.py +1 -2
datachain/lib/meta_formats.py +3 -4
datachain/lib/model_store.py +39 -7
datachain/lib/namespaces.py +125 -0
datachain/lib/projects.py +130 -0
datachain/lib/pytorch.py +32 -21
datachain/lib/settings.py +192 -56
datachain/lib/signal_schema.py +427 -104
datachain/lib/tar.py +1 -2
datachain/lib/text.py +8 -7
datachain/lib/udf.py +164 -76
datachain/lib/udf_signature.py +60 -35
datachain/lib/utils.py +118 -4
datachain/lib/video.py +17 -9
datachain/lib/webdataset.py +61 -56
datachain/lib/webdataset_laion.py +15 -16
datachain/listing.py +22 -10
datachain/model/bbox.py +3 -1
datachain/model/ultralytics/bbox.py +16 -12
datachain/model/ultralytics/pose.py +16 -12
datachain/model/ultralytics/segment.py +16 -12
datachain/namespace.py +84 -0
datachain/node.py +6 -6
datachain/nodes_thread_pool.py +0 -1
datachain/plugins.py +24 -0
datachain/project.py +78 -0
datachain/query/batch.py +40 -41
datachain/query/dataset.py +604 -322
datachain/query/dispatch.py +261 -154
datachain/query/metrics.py +4 -6
datachain/query/params.py +2 -3
datachain/query/queue.py +3 -12
datachain/query/schema.py +11 -6
datachain/query/session.py +200 -33
datachain/query/udf.py +34 -2
datachain/remote/studio.py +171 -69
datachain/script_meta.py +12 -12
datachain/semver.py +68 -0
datachain/sql/__init__.py +2 -0
datachain/sql/functions/array.py +33 -1
datachain/sql/postgresql_dialect.py +9 -0
datachain/sql/postgresql_types.py +21 -0
datachain/sql/sqlite/__init__.py +5 -1
datachain/sql/sqlite/base.py +102 -29
datachain/sql/sqlite/types.py +8 -13
datachain/sql/types.py +70 -15
datachain/studio.py +223 -46
datachain/toolkit/split.py +31 -10
datachain/utils.py +101 -59
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/METADATA +77 -22
datachain-0.39.0.dist-info/RECORD +173 -0
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/WHEEL +1 -1
datachain/cli/commands/query.py +0 -53
datachain/query/utils.py +0 -42
datachain-0.14.2.dist-info/RECORD +0 -158
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/entry_points.txt +0 -0
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/licenses/LICENSE +0 -0
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/top_level.txt +0 -0

datachain/lib/hf.py CHANGED Viewed

@@ -11,7 +11,7 @@ try:
         Image,
         IterableDataset,
         IterableDatasetDict,
-        Sequence,
+        List,
         Value,
         load_dataset,
     )
@@ -26,7 +26,7 @@ except ImportError as exc:
     ) from exc
 from io import BytesIO
-from typing import TYPE_CHECKING, Any, Union
+from typing import TYPE_CHECKING, Any, TypeAlias
 import PIL
 from tqdm.auto import tqdm
@@ -34,13 +34,16 @@ from tqdm.auto import tqdm
 from datachain.lib.arrow import arrow_type_mapper
 from datachain.lib.data_model import DataModel, DataType, dict_to_data_model
 from datachain.lib.udf import Generator
+from datachain.lib.utils import normalize_col_names
 if TYPE_CHECKING:
     import pyarrow as pa
     from pydantic import BaseModel
-HFDatasetType = Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]
+HFDatasetType: TypeAlias = (
+    str | DatasetDict | Dataset | IterableDatasetDict | IterableDataset
+)
 class HFClassLabel(DataModel):
@@ -59,7 +62,6 @@ class HFImage(DataModel):
 class HFAudio(DataModel):
-    path: str
     array: list[float]
     sampling_rate: int
@@ -67,23 +69,27 @@ class HFAudio(DataModel):
 class HFGenerator(Generator):
     def __init__(
         self,
-        ds: Union[str, HFDatasetType],
+        ds: HFDatasetType,
         output_schema: type["BaseModel"],
+        limit: int = 0,
         *args,
         **kwargs,
     ):
         """
-        Generator for chain from huggingface datasets.
+        Generator for chain from Hugging Face datasets.
         Parameters:
-        ds : Path or name of the dataset to read from Hugging Face Hub,
-            or an instance of `datasets.Dataset`-like object.
-        output_schema : Pydantic model for validation.
+            ds : Path or name of the dataset to read from Hugging Face Hub,
+                or an instance of `datasets.Dataset`-like object.
+            limit : Limit the number of items to read from the HF dataset.
+                    Defaults to 0 (no limit).
+            output_schema : Pydantic model for validation.
         """
         super().__init__()
         self.ds = ds
         self.output_schema = output_schema
+        self.limit = limit
         self.args = args
         self.kwargs = kwargs
@@ -93,57 +99,81 @@ class HFGenerator(Generator):
     def process(self, split: str = ""):
         desc = "Parsed Hugging Face dataset"
         ds = self.ds_dict[split]
+        if self.limit > 0:
+            ds = ds.take(self.limit)
         if split:
             desc += f" split '{split}'"
+        model_fields = self.output_schema._model_fields_by_aliases()  # type: ignore[attr-defined]
         with tqdm(desc=desc, unit=" rows", leave=False) as pbar:
             for row in ds:
                 output_dict = {}
                 if split and "split" in self.output_schema.model_fields:
                     output_dict["split"] = split
                 for name, feat in ds.features.items():
-                    anno = self.output_schema.model_fields[name].annotation
-                    output_dict[name] = convert_feature(row[name], feat, anno)
+                    normalized_name, info = model_fields[name]
+                    anno = info.annotation
+                    output_dict[normalized_name] = convert_feature(
+                        row[name], feat, anno
+                    )
                 yield self.output_schema(**output_dict)
                 pbar.update(1)
-def stream_splits(ds: Union[str, HFDatasetType], *args, **kwargs):
+def stream_splits(ds: HFDatasetType, *args, **kwargs):
     if isinstance(ds, str):
-        kwargs["streaming"] = True
         ds = load_dataset(ds, *args, **kwargs)
     if isinstance(ds, (DatasetDict, IterableDatasetDict)):
         return ds
     return {"": ds}
-def convert_feature(val: Any, feat: Any, anno: Any) -> Any:  # noqa: PLR0911
-    if isinstance(feat, (Value, Array2D, Array3D, Array4D, Array5D)):
+def convert_feature(val: Any, feat: Any, anno: Any) -> Any:
+    if isinstance(feat, (Value, Array2D, Array3D, Array4D, Array5D, List)):
         return val
     if isinstance(feat, ClassLabel):
         return HFClassLabel(string=feat.names[val], integer=val)
-    if isinstance(feat, Sequence):
-        if isinstance(feat.feature, dict):
-            sdict = {}
-            for sname in val:
-                sfeat = feat.feature[sname]
-                sanno = anno.model_fields[sname].annotation
-                sdict[sname] = [convert_feature(v, sfeat, sanno) for v in val[sname]]
-            return anno(**sdict)
-        return val
+    if isinstance(feat, dict):
+        sdict = {}
+        model_fields = anno._model_fields_by_aliases()  # type: ignore[attr-defined]
+        for sname in val:
+            sfeat = feat[sname]
+            norm_name, info = model_fields[sname]
+            sanno = info.annotation
+            if isinstance(val[sname], list):
+                sdict[norm_name] = [
+                    convert_feature(v, sfeat, sanno) for v in val[sname]
+                ]
+            else:
+                sdict[norm_name] = convert_feature(val[sname], sfeat, sanno)
+        return anno(**sdict)
     if isinstance(feat, Image):
         if isinstance(val, dict):
             return HFImage(img=val["bytes"])
         return HFImage(img=image_to_bytes(val))
     if isinstance(feat, Audio):
-        return HFAudio(**val)
-def get_output_schema(features: Features) -> dict[str, DataType]:
-    """Generate UDF output schema from huggingface datasets features."""
+        return HFAudio(array=val["array"], sampling_rate=val["sampling_rate"])
+def get_output_schema(
+    features: Features, existing_column_names: list[str] | None = None
+) -> tuple[dict[str, DataType], dict[str, str]]:
+    """
+    Generate UDF output schema from Hugging Face datasets features. It normalizes the
+    column names and returns a mapping of normalized names to original names along with
+    the data types. `existing_column_names` is the list of column names that already
+    exist in the dataset (to avoid name collisions due to normalization).
+    """
+    existing_column_names = existing_column_names or []
     fields_dict = {}
-    for name, val in features.items():
-        fields_dict[name] = _feature_to_chain_type(name, val)
-    return fields_dict
+    normalized_names = normalize_col_names(
+        existing_column_names + list(features.keys())
+    )
+    # List of tuple(str, str) for HF dataset feature names, (normalized, original)
+    new_feature_names = list(normalized_names.items())[len(existing_column_names) :]
+    for idx, feat in enumerate(features.items()):
+        name, val = feat
+        fields_dict[new_feature_names[idx][0]] = _feature_to_chain_type(name, val)
+    return fields_dict, normalized_names
 def _feature_to_chain_type(name: str, val: Any) -> DataType:  # noqa: PLR0911
@@ -151,13 +181,13 @@ def _feature_to_chain_type(name: str, val: Any) -> DataType:  # noqa: PLR0911
         return arrow_type_mapper(val.pa_type)
     if isinstance(val, ClassLabel):
         return HFClassLabel
-    if isinstance(val, Sequence):
-        if isinstance(val.feature, dict):
-            sequence_dict = {}
-            for sname, sval in val.feature.items():
-                dtype = _feature_to_chain_type(sname, sval)
-                sequence_dict[sname] = list[dtype]  # type: ignore[valid-type]
-            return dict_to_data_model(name, sequence_dict)  # type: ignore[arg-type]
+    if isinstance(val, dict):
+        sequence_dict = {}
+        for sname, sval in val.items():
+            dtype = _feature_to_chain_type(sname, sval)
+            sequence_dict[sname] = dtype  # type: ignore[valid-type]
+        return dict_to_data_model(f"HFDataModel_{name}", sequence_dict)  # type: ignore[arg-type]
+    if isinstance(val, List):
         return list[_feature_to_chain_type(name, val.feature)]  # type: ignore[arg-type,misc,return-value]
     if isinstance(val, Array2D):
         dtype = arrow_type_mapper(string_to_arrow(val.dtype))

datachain/lib/image.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Callable, Optional, Union
+from collections.abc import Callable
 import torch
 from PIL import Image as PILImage
@@ -6,7 +6,7 @@ from PIL import Image as PILImage
 from datachain.lib.file import File, FileError, Image, ImageFile
-def image_info(file: Union[File, ImageFile]) -> Image:
+def image_info(file: File | ImageFile) -> Image:
     """
     Returns image file information.
@@ -19,7 +19,7 @@ def image_info(file: Union[File, ImageFile]) -> Image:
     try:
         img = file.as_image_file().read()
     except Exception as exc:
-        raise FileError(file, "unable to open image file") from exc
+        raise FileError("unable to open image file", file.source, file.path) from exc
     return Image(
         width=img.width,
@@ -31,11 +31,11 @@ def image_info(file: Union[File, ImageFile]) -> Image:
 def convert_image(
     img: PILImage.Image,
     mode: str = "RGB",
-    size: Optional[tuple[int, int]] = None,
-    transform: Optional[Callable] = None,
-    encoder: Optional[Callable] = None,
-    device: Optional[Union[str, torch.device]] = None,
-) -> Union[PILImage.Image, torch.Tensor]:
+    size: tuple[int, int] | None = None,
+    transform: Callable | None = None,
+    encoder: Callable | None = None,
+    device: str | torch.device | None = None,
+) -> PILImage.Image | torch.Tensor:
     """
     Resize, transform, and otherwise convert an image.
@@ -71,13 +71,13 @@ def convert_image(
 def convert_images(
-    images: Union[PILImage.Image, list[PILImage.Image]],
+    images: PILImage.Image | list[PILImage.Image],
     mode: str = "RGB",
-    size: Optional[tuple[int, int]] = None,
-    transform: Optional[Callable] = None,
-    encoder: Optional[Callable] = None,
-    device: Optional[Union[str, torch.device]] = None,
-) -> Union[list[PILImage.Image], torch.Tensor]:
+    size: tuple[int, int] | None = None,
+    transform: Callable | None = None,
+    encoder: Callable | None = None,
+    device: str | torch.device | None = None,
+) -> list[PILImage.Image] | torch.Tensor:
     """
     Resize, transform, and otherwise convert one or more images.

datachain/lib/listing.py CHANGED Viewed

@@ -2,10 +2,10 @@ import glob
 import logging
 import os
 import posixpath
-from collections.abc import Iterator
+from collections.abc import Callable, Iterator
 from contextlib import contextmanager
 from datetime import datetime, timedelta, timezone
-from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union
+from typing import TYPE_CHECKING, TypeVar
 from fsspec.asyn import get_loop
 from sqlalchemy.sql.expression import true
@@ -56,6 +56,8 @@ def list_bucket(uri: str, cache, client_config=None) -> Callable:
         for entries in iter_over_async(client.scandir(path.rstrip("/")), get_loop()):
             yield from entries
+    list_func.__name__ = "read_storage"
     return list_func
@@ -71,8 +73,8 @@ def get_file_info(uri: str, cache, client_config=None) -> File:
 def ls(
     dc: D,
     path: str,
-    recursive: Optional[bool] = True,
-    object_name="file",
+    recursive: bool | None = True,
+    column="file",
 ) -> D:
     """
     Return files by some path from DataChain instance which contains bucket listing.
@@ -82,7 +84,7 @@ def ls(
     """
     def _file_c(name: str) -> Column:
-        return Column(f"{object_name}.{name}")
+        return Column(f"{column}.{name}")
     dc = dc.filter(_file_c("is_latest") == true())
@@ -105,11 +107,10 @@ def ls(
     return dc.filter(pathfunc.parent(_file_c("path")) == path.lstrip("/").rstrip("/*"))
-def parse_listing_uri(uri: str, client_config) -> tuple[str, str, str]:
+def parse_listing_uri(uri: str) -> tuple[str, str, str]:
     """
     Parsing uri and returns listing dataset name, listing uri and listing path
     """
-    client_config = client_config or {}
     storage_uri, path = Client.parse_url(uri)
     if uses_glob(path):
         lst_uri_path = posixpath.dirname(path)
@@ -122,6 +123,9 @@ def parse_listing_uri(uri: str, client_config) -> tuple[str, str, str]:
         f"{LISTING_PREFIX}{storage_uri}/{posixpath.join(lst_uri_path, '').lstrip('/')}"
     )
+    # we should remove dots from the name
+    ds_name = ds_name.replace(".", "_")
     return ds_name, lst_uri, path
@@ -146,8 +150,8 @@ def _reraise_as_client_error() -> Iterator[None]:
 def get_listing(
-    uri: Union[str, os.PathLike[str]], session: "Session", update: bool = False
-) -> tuple[Optional[str], str, str, bool]:
+    uri: str | os.PathLike[str], session: "Session", update: bool = False
+) -> tuple[str | None, str, str, bool]:
     """Returns correct listing dataset name that must be used for saving listing
     operation. It takes into account existing listings and reusability of those.
     It also returns boolean saying if returned dataset name is reused / already
@@ -173,7 +177,7 @@ def get_listing(
         _, path = Client.parse_url(uri)
         return None, uri, path, False
-    ds_name, list_uri, list_path = parse_listing_uri(uri, client_config)
+    ds_name, list_uri, list_path = parse_listing_uri(uri)
     listing = None
     listings = [
         ls for ls in catalog.listings() if not ls.is_expired and ls.contains(ds_name)
@@ -194,5 +198,4 @@ def get_listing(
         list_path = f"{ds_name.strip('/').removeprefix(listing.name)}/{list_path}"
     ds_name = listing.name if listing else ds_name
     return ds_name, list_uri, list_path, bool(listing)

datachain/lib/listing_info.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from datetime import datetime, timedelta, timezone
-from typing import Optional
 from datachain.client import Client
 from datachain.lib.dataset_info import DatasetInfo
@@ -17,7 +16,7 @@ class ListingInfo(DatasetInfo):
         return uri
     @property
-    def expires(self) -> Optional[datetime]:
+    def expires(self) -> datetime | None:
         if not self.finished_at:
             return None
         return self.finished_at + timedelta(seconds=LISTING_TTL)

datachain/lib/meta_formats.py CHANGED Viewed

@@ -1,14 +1,13 @@
 import csv
-import json
 import tempfile
 import uuid
-from collections.abc import Iterator
+from collections.abc import Callable, Iterator
 from pathlib import Path
-from typing import Callable
 import jmespath as jsp
 from pydantic import BaseModel, ConfigDict, Field, ValidationError  # noqa: F401
+from datachain import json
 from datachain.lib.data_model import DataModel  # noqa: F401
 from datachain.lib.file import TextFile
@@ -106,7 +105,7 @@ def read_meta(  # noqa: C901
     from datachain import read_storage
     if schema_from:
-        file = next(read_storage(schema_from, type="text").limit(1).collect("file"))
+        file = read_storage(schema_from, type="text").limit(1).to_values("file")[0]
         model_code = gen_datamodel_code(
             file, format=format, jmespath=jmespath, model_name=model_name
         )

datachain/lib/model_store.py CHANGED Viewed

@@ -1,11 +1,8 @@
 import inspect
-import logging
-from typing import Any, ClassVar, Optional
+from typing import Any, ClassVar
 from pydantic import BaseModel
-logger = logging.getLogger(__name__)
 class ModelStore:
     store: ClassVar[dict[str, dict[int, type[BaseModel]]]] = {}
@@ -14,7 +11,7 @@ class ModelStore:
     def get_version(cls, model: type[BaseModel]) -> int:
         if not hasattr(model, "_version"):
             return 0
-        return model._version
+        return model._version  # type: ignore[attr-defined]
     @classmethod
     def get_name(cls, model) -> str:
@@ -39,7 +36,7 @@ class ModelStore:
                 cls.register(anno)
     @classmethod
-    def get(cls, name: str, version: Optional[int] = None) -> Optional[type]:
+    def get(cls, name: str, version: int | None = None) -> type | None:
         class_dict = cls.store.get(name, None)
         if class_dict is None:
             return None
@@ -77,7 +74,42 @@ class ModelStore:
         )
     @staticmethod
-    def to_pydantic(val) -> Optional[type[BaseModel]]:
+    def to_pydantic(val) -> type[BaseModel] | None:
         if val is None or not ModelStore.is_pydantic(val):
             return None
         return val
+    @staticmethod
+    def is_partial(parent_type) -> bool:
+        return (
+            parent_type
+            and ModelStore.is_pydantic(parent_type)
+            and "@" in ModelStore.get_name(parent_type)
+        )
+    @classmethod
+    def rebuild_all(cls) -> None:
+        """Ensure pydantic schemas are (re)built for all registered models.
+        Uses ``force=True`` to avoid subtle cases where a deserialized class
+        (e.g. from by-value cloudpickle in workers) reports built state but
+        nested model field schemas aren't fully resolved yet.
+        """
+        visited: set[type[BaseModel]] = set()
+        visiting: set[type[BaseModel]] = set()
+        def visit(model: type[BaseModel]) -> None:
+            if model in visited or model in visiting:
+                return
+            visiting.add(model)
+            for field in model.model_fields.values():
+                child = cls.to_pydantic(field.annotation)
+                if child is not None:
+                    visit(child)
+            visiting.remove(model)
+            model.model_rebuild(force=True)
+            visited.add(model)
+        for versions in cls.store.values():
+            for model in versions.values():
+                visit(model)

datachain/lib/namespaces.py ADDED Viewed

@@ -0,0 +1,125 @@
+from datachain.error import (
+    NamespaceCreateNotAllowedError,
+    NamespaceDeleteNotAllowedError,
+)
+from datachain.lib.projects import delete as delete_project
+from datachain.namespace import Namespace, parse_name
+from datachain.query import Session
+def create(
+    name: str, descr: str | None = None, session: Session | None = None
+) -> Namespace:
+    """
+    Creates a new namespace.
+    Namespaces organize projects, which in turn organize datasets. A default
+    namespace always exists and is used if none is specified. Multiple namespaces
+    can be created in Studio, but only the default is available in the CLI.
+    Parameters:
+        name: Name of the new namespace.
+        descr: Optional description of the namespace.
+        session: Optional session to use for the operation.
+    Example:
+        ```py
+        from datachain.lib.namespaces import create as create_namespace
+        namespace = create_namespace("dev", "Dev namespace")
+        ```
+    """
+    session = Session.get(session)
+    from datachain.lib.dc.utils import is_studio
+    if not is_studio():
+        raise NamespaceCreateNotAllowedError("Creating namespace is not allowed")
+    Namespace.validate_name(name)
+    return session.catalog.metastore.create_namespace(name, descr)
+def get(name: str, session: Session | None = None) -> Namespace:
+    """
+    Gets a namespace by name.
+    If the namespace is not found, a `NamespaceNotFoundError` is raised.
+    Parameters:
+        name : The name of the namespace.
+        session : Session to use for getting namespace.
+    Example:
+        ```py
+        import datachain as dc
+        namespace = dc.get_namespace("local")
+        ```
+    """
+    session = Session.get(session)
+    return session.catalog.metastore.get_namespace(name)
+def ls(session: Session | None = None) -> list[Namespace]:
+    """
+    Gets a list of all namespaces.
+    Parameters:
+        session : Session to use for getting namespaces.
+    Example:
+        ```py
+        from datachain.lib.namespaces import ls as ls_namespaces
+        namespaces = ls_namespaces()
+        ```
+    """
+    return Session.get(session).catalog.metastore.list_namespaces()
+def delete_namespace(name: str, session: Session | None = None) -> None:
+    """
+    Removes a namespace by name.
+    Raises:
+        NamespaceNotFoundError: If the namespace does not exist.
+        NamespaceDeleteNotAllowedError: If the namespace is non-empty,
+            is the default namespace, or is a system namespace,
+            as these cannot be removed.
+    Parameters:
+        name: The name of the namespace.
+        session: Session to use for getting project.
+    Example:
+        ```py
+        import datachain as dc
+        dc.delete_namespace("dev")
+        ```
+    """
+    session = Session.get(session)
+    metastore = session.catalog.metastore
+    namespace_name, project_name = parse_name(name)
+    if project_name:
+        return delete_project(project_name, namespace_name, session)
+    namespace = metastore.get_namespace(name)
+    if name == metastore.system_namespace_name:
+        raise NamespaceDeleteNotAllowedError(
+            f"Namespace {metastore.system_namespace_name} cannot be removed"
+        )
+    if name == metastore.default_namespace_name:
+        raise NamespaceDeleteNotAllowedError(
+            f"Namespace {metastore.default_namespace_name} cannot be removed"
+        )
+    num_projects = metastore.count_projects(namespace.id)
+    if num_projects > 0:
+        raise NamespaceDeleteNotAllowedError(
+            f"Namespace cannot be removed. It contains {num_projects} project(s). "
+            "Please remove the project(s) first."
+        )
+    metastore.remove_namespace(namespace.id)

datachain 0.14.2__py3-none-any.whl → 0.39.0__py3-none-any.whl

datachain 0.14.2py3-none-any.whl → 0.39.0py3-none-any.whl