PyPI - datachain - Versions diffs - 0.11.11__py3-none-any.whl → 0.13.0__py3-none-any.whl - Mend

datachain 0.11.11py3-none-any.whl → 0.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (44) hide show

datachain/catalog/catalog.py +39 -7
datachain/catalog/loader.py +19 -13
datachain/cli/__init__.py +2 -1
datachain/cli/commands/ls.py +8 -6
datachain/cli/commands/show.py +7 -0
datachain/cli/parser/studio.py +13 -1
datachain/client/fsspec.py +12 -16
datachain/client/gcs.py +1 -1
datachain/client/hf.py +36 -14
datachain/client/local.py +1 -4
datachain/client/s3.py +1 -1
datachain/data_storage/metastore.py +6 -0
datachain/data_storage/warehouse.py +3 -8
datachain/dataset.py +8 -0
datachain/error.py +0 -12
datachain/fs/utils.py +30 -0
datachain/func/__init__.py +5 -0
datachain/func/func.py +2 -1
datachain/lib/dc.py +59 -15
datachain/lib/file.py +63 -18
datachain/lib/image.py +30 -6
datachain/lib/listing.py +21 -39
datachain/lib/meta_formats.py +2 -2
datachain/lib/signal_schema.py +65 -18
datachain/lib/udf.py +3 -0
datachain/lib/udf_signature.py +17 -9
datachain/lib/video.py +7 -5
datachain/model/bbox.py +209 -58
datachain/model/pose.py +49 -37
datachain/model/segment.py +22 -18
datachain/model/ultralytics/bbox.py +9 -9
datachain/model/ultralytics/pose.py +7 -7
datachain/model/ultralytics/segment.py +7 -7
datachain/model/utils.py +191 -0
datachain/query/dataset.py +8 -2
datachain/sql/sqlite/base.py +2 -2
datachain/studio.py +8 -6
datachain/utils.py +0 -16
{datachain-0.11.11.dist-info → datachain-0.13.0.dist-info}/METADATA +4 -2
{datachain-0.11.11.dist-info → datachain-0.13.0.dist-info}/RECORD +44 -42
{datachain-0.11.11.dist-info → datachain-0.13.0.dist-info}/WHEEL +1 -1
{datachain-0.11.11.dist-info → datachain-0.13.0.dist-info}/LICENSE +0 -0
{datachain-0.11.11.dist-info → datachain-0.13.0.dist-info}/entry_points.txt +0 -0
{datachain-0.11.11.dist-info → datachain-0.13.0.dist-info}/top_level.txt +0 -0

datachain/catalog/catalog.py CHANGED Viewed

@@ -25,7 +25,6 @@ from typing import (
 )
 from uuid import uuid4
-import requests
 import sqlalchemy as sa
 from sqlalchemy import Column
 from tqdm.auto import tqdm
@@ -54,7 +53,6 @@ from datachain.error import (
 from datachain.lib.listing import get_listing
 from datachain.node import DirType, Node, NodeWithPath
 from datachain.nodes_thread_pool import NodesThreadPool
-from datachain.remote.studio import StudioClient
 from datachain.sql.types import DateTime, SQLType
 from datachain.utils import DataChainDir
@@ -162,6 +160,8 @@ class DatasetRowsFetcher(NodesThreadPool):
         max_threads: int = PULL_DATASET_MAX_THREADS,
         progress_bar=None,
     ):
+        from datachain.remote.studio import StudioClient
         super().__init__(max_threads)
         self._check_dependencies()
         self.metastore = metastore
@@ -234,6 +234,8 @@ class DatasetRowsFetcher(NodesThreadPool):
         return df.drop("sys__id", axis=1)
     def get_parquet_content(self, url: str):
+        import requests
         while True:
             if self.should_check_for_status():
                 self.check_for_status()
@@ -775,6 +777,8 @@ class Catalog:
         validate_version: Optional[bool] = True,
         listing: Optional[bool] = False,
         uuid: Optional[str] = None,
+        description: Optional[str] = None,
+        labels: Optional[list[str]] = None,
     ) -> "DatasetRecord":
         """
         Creates new dataset of a specific version.
@@ -801,6 +805,8 @@ class Catalog:
                 query_script=query_script,
                 schema=schema,
                 ignore_if_exists=True,
+                description=description,
+                labels=labels,
             )
         version = version or default_version
@@ -1130,6 +1136,8 @@ class Catalog:
         raise DatasetNotFoundError(f"Dataset with version uuid {uuid} not found.")
     def get_remote_dataset(self, name: str) -> DatasetRecord:
+        from datachain.remote.studio import StudioClient
         studio_client = StudioClient()
         info_response = studio_client.dataset_info(name)
@@ -1164,8 +1172,27 @@ class Catalog:
         return direct_dependencies
-    def ls_datasets(self, include_listing: bool = False) -> Iterator[DatasetListRecord]:
-        datasets = self.metastore.list_datasets()
+    def ls_datasets(
+        self, include_listing: bool = False, studio: bool = False
+    ) -> Iterator[DatasetListRecord]:
+        from datachain.remote.studio import StudioClient
+        if studio:
+            client = StudioClient()
+            response = client.ls_datasets()
+            if not response.ok:
+                raise DataChainError(response.message)
+            if not response.data:
+                return
+            datasets: Iterator[DatasetListRecord] = (
+                DatasetListRecord.from_dict(d)
+                for d in response.data
+                if not d.get("name", "").startswith(QUERY_DATASET_PREFIX)
+            )
+        else:
+            datasets = self.metastore.list_datasets()
         for d in datasets:
             if not d.is_bucket_listing or include_listing:
                 yield d
@@ -1173,9 +1200,12 @@ class Catalog:
     def list_datasets_versions(
         self,
         include_listing: bool = False,
+        studio: bool = False,
     ) -> Iterator[tuple[DatasetListRecord, "DatasetListVersion", Optional["Job"]]]:
         """Iterate over all dataset versions with related jobs."""
-        datasets = list(self.ls_datasets(include_listing=include_listing))
+        datasets = list(
+            self.ls_datasets(include_listing=include_listing, studio=studio)
+        )
         # preselect dataset versions jobs from db to avoid multiple queries
         jobs_ids: set[str] = {
@@ -1345,6 +1375,8 @@ class Catalog:
         if cp and not output:
             raise ValueError("Please provide output directory for instantiation")
+        from datachain.remote.studio import StudioClient
         studio_client = StudioClient()
         try:
@@ -1580,7 +1612,7 @@ class Catalog:
             except TerminationSignal as exc:
                 signal.signal(signal.SIGTERM, orig_sigterm_handler)
                 signal.signal(signal.SIGINT, orig_sigint_handler)
-                logging.info("Shutting down process %s, received %r", proc.pid, exc)
+                logger.info("Shutting down process %s, received %r", proc.pid, exc)
                 # Rather than forwarding the signal to the child, we try to shut it down
                 # gracefully. This is because we consider the script to be interactive
                 # and special, so we give it time to cleanup before exiting.
@@ -1595,7 +1627,7 @@ class Catalog:
                 if thread:
                     thread.join()  # wait for the reader thread
-        logging.info("Process %s exited with return code %s", proc.pid, proc.returncode)
+        logger.info("Process %s exited with return code %s", proc.pid, proc.returncode)
         if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
             raise QueryScriptCancelError(
                 "Query script was canceled by user",

datachain/catalog/loader.py CHANGED Viewed

@@ -1,19 +1,13 @@
 import os
 from importlib import import_module
-from typing import Any, Optional
-from datachain.catalog import Catalog
-from datachain.data_storage import (
-    AbstractMetastore,
-    AbstractWarehouse,
-)
-from datachain.data_storage.serializer import deserialize
-from datachain.data_storage.sqlite import (
-    SQLiteMetastore,
-    SQLiteWarehouse,
-)
+from typing import TYPE_CHECKING, Any, Optional
 from datachain.utils import get_envs_by_prefix
+if TYPE_CHECKING:
+    from datachain.catalog import Catalog
+    from datachain.data_storage import AbstractMetastore, AbstractWarehouse
 METASTORE_SERIALIZED = "DATACHAIN__METASTORE"
 METASTORE_IMPORT_PATH = "DATACHAIN_METASTORE"
 METASTORE_ARG_PREFIX = "DATACHAIN_METASTORE_ARG_"
@@ -27,6 +21,9 @@ IN_MEMORY_ERROR_MESSAGE = "In-memory is only supported on SQLite"
 def get_metastore(in_memory: bool = False) -> "AbstractMetastore":
+    from datachain.data_storage import AbstractMetastore
+    from datachain.data_storage.serializer import deserialize
     metastore_serialized = os.environ.get(METASTORE_SERIALIZED)
     if metastore_serialized:
         metastore_obj = deserialize(metastore_serialized)
@@ -45,6 +42,8 @@ def get_metastore(in_memory: bool = False) -> "AbstractMetastore":
     }
     if not metastore_import_path:
+        from datachain.data_storage.sqlite import SQLiteMetastore
         metastore_args["in_memory"] = in_memory
         return SQLiteMetastore(**metastore_args)
     if in_memory:
@@ -62,6 +61,9 @@ def get_metastore(in_memory: bool = False) -> "AbstractMetastore":
 def get_warehouse(in_memory: bool = False) -> "AbstractWarehouse":
+    from datachain.data_storage import AbstractWarehouse
+    from datachain.data_storage.serializer import deserialize
     warehouse_serialized = os.environ.get(WAREHOUSE_SERIALIZED)
     if warehouse_serialized:
         warehouse_obj = deserialize(warehouse_serialized)
@@ -80,6 +82,8 @@ def get_warehouse(in_memory: bool = False) -> "AbstractWarehouse":
     }
     if not warehouse_import_path:
+        from datachain.data_storage.sqlite import SQLiteWarehouse
         warehouse_args["in_memory"] = in_memory
         return SQLiteWarehouse(**warehouse_args)
     if in_memory:
@@ -121,7 +125,7 @@ def get_distributed_class(**kwargs):
 def get_catalog(
     client_config: Optional[dict[str, Any]] = None, in_memory: bool = False
-) -> Catalog:
+) -> "Catalog":
     """
     Function that creates Catalog instance with appropriate metastore
     and warehouse classes. Metastore class can be provided with env variable
@@ -133,6 +137,8 @@ def get_catalog(
     and name of variable after, e.g. if it accepts team_id as kwargs
     we can provide DATACHAIN_METASTORE_ARG_TEAM_ID=12345 env variable.
     """
+    from datachain.catalog import Catalog
     return Catalog(
         metastore=get_metastore(in_memory=in_memory),
         warehouse=get_warehouse(in_memory=in_memory),

datachain/cli/__init__.py CHANGED Viewed

@@ -6,7 +6,6 @@ from multiprocessing import freeze_support
 from typing import Optional
 from datachain.cli.utils import get_logging_level
-from datachain.telemetry import telemetry
 from .commands import (
     clear_cache,
@@ -70,6 +69,8 @@ def main(argv: Optional[list[str]] = None) -> int:
         error, return_code = handle_general_exception(exc, args, logging_level)
         return return_code
     finally:
+        from datachain.telemetry import telemetry
         telemetry.send_cli_call(args.command, error=error)

datachain/cli/commands/ls.py CHANGED Viewed

@@ -38,11 +38,12 @@ def ls_local(
 ):
     from datachain import DataChain
-    if catalog is None:
-        from datachain.catalog import get_catalog
-        catalog = get_catalog(client_config=client_config)
     if sources:
+        if catalog is None:
+            from datachain.catalog import get_catalog
+            catalog = get_catalog(client_config=client_config)
         actual_sources = list(ls_urls(sources, catalog=catalog, long=long, **kwargs))
         if len(actual_sources) == 1:
             for _, entries in actual_sources:
@@ -61,8 +62,9 @@ def ls_local(
                 for entry in entries:
                     print(format_ls_entry(entry))
     else:
-        chain = DataChain.listings()
-        for ls in chain.collect("listing"):
+        # Collect results in a list here to prevent interference from `tqdm` and `print`
+        listing = list(DataChain.listings().collect("listing"))
+        for ls in listing:
             print(format_ls_entry(f"{ls.uri}@v{ls.version}"))  # type: ignore[union-attr]

datachain/cli/commands/show.py CHANGED Viewed

@@ -40,6 +40,13 @@ def show(
         .offset(offset)
     )
     records = query.to_db_records()
+    print("Name: ", name)
+    if dataset.description:
+        print("Description: ", dataset.description)
+    if dataset.labels:
+        print("Labels: ", ",".join(dataset.labels))
+    print("\n")
     show_records(records, collapse_columns=not no_collapse, hidden_fields=hidden_fields)
     if schema and dataset_version.feature_schema:

datachain/cli/parser/studio.py CHANGED Viewed

@@ -63,19 +63,31 @@ def add_auth_parser(subparsers, parent_parser) -> None:
         default=False,
         help="Use code-based authentication without browser",
     )
+    login_parser.add_argument(
+        "--local",
+        action="store_true",
+        default=False,
+        help="Save the token in the local project config",
+    )
     auth_logout_help = "Log out from Studio"
     auth_logout_description = (
         "Remove the Studio authentication token from global config."
     )
-    auth_subparser.add_parser(
+    logout_parser = auth_subparser.add_parser(
         "logout",
         parents=[parent_parser],
         description=auth_logout_description,
         help=auth_logout_help,
         formatter_class=CustomHelpFormatter,
     )
+    logout_parser.add_argument(
+        "--local",
+        action="store_true",
+        default=False,
+        help="Remove the token from the local project config",
+    )
     auth_team_help = "Set default team for Studio operations"
     auth_team_description = "Set the default team for Studio operations."

datachain/client/fsspec.py CHANGED Viewed

@@ -17,10 +17,10 @@ from typing import (
     ClassVar,
     NamedTuple,
     Optional,
+    Union,
 )
 from urllib.parse import urlparse
-from botocore.exceptions import ClientError
 from dvc_objects.fs.system import reflink
 from fsspec.asyn import get_loop, sync
 from fsspec.callbacks import DEFAULT_CALLBACK, Callback
@@ -28,7 +28,6 @@ from tqdm.auto import tqdm
 from datachain.cache import Cache
 from datachain.client.fileslice import FileWrapper
-from datachain.error import ClientError as DataChainClientError
 from datachain.nodes_fetcher import NodesFetcher
 from datachain.nodes_thread_pool import NodeChunk
@@ -83,19 +82,17 @@ class Client(ABC):
         self.uri = self.get_uri(self.name)
     @staticmethod
-    def get_implementation(url: str) -> type["Client"]:
+    def get_implementation(url: Union[str, os.PathLike[str]]) -> type["Client"]:
         from .azure import AzureClient
         from .gcs import GCSClient
         from .hf import HfClient
         from .local import FileClient
         from .s3 import ClientS3
-        protocol = urlparse(url).scheme
+        protocol = urlparse(str(url)).scheme
-        if not protocol or _is_win_local_path(url):
+        if not protocol or _is_win_local_path(str(url)):
             return FileClient
-        protocol = protocol.lower()
         if protocol == ClientS3.protocol:
             return ClientS3
         if protocol == GCSClient.protocol:
@@ -121,9 +118,11 @@ class Client(ABC):
         return cls.get_uri(storage_name), rel_path
     @staticmethod
-    def get_client(source: str, cache: Cache, **kwargs) -> "Client":
+    def get_client(
+        source: Union[str, os.PathLike[str]], cache: Cache, **kwargs
+    ) -> "Client":
         cls = Client.get_implementation(source)
-        storage_url, _ = cls.split_url(source)
+        storage_url, _ = cls.split_url(str(source))
         if os.name == "nt":
             storage_url = storage_url.removeprefix("/")
@@ -209,7 +208,7 @@ class Client(ABC):
     async def get_current_etag(self, file: "File") -> str:
         kwargs = {}
-        if self.fs.version_aware:
+        if getattr(self.fs, "version_aware", False):
             kwargs["version_id"] = file.version
         info = await self.fs._info(
             self.get_full_path(file.path, file.version), **kwargs
@@ -286,11 +285,6 @@ class Client(ABC):
                     worker.cancel()
             if excs:
                 raise excs[0]
-        except ClientError as exc:
-            raise DataChainClientError(
-                exc.response.get("Error", {}).get("Message") or exc,
-                exc.response.get("Error", {}).get("Code"),
-            ) from exc
         finally:
             # This ensures the progress bar is closed before any exceptions are raised
             progress_bar.close()
@@ -333,7 +327,9 @@ class Client(ABC):
         return not (key.startswith("/") or key.endswith("/") or "//" in key)
     async def ls_dir(self, path):
-        return await self.fs._ls(path, detail=True, versions=True)
+        if getattr(self.fs, "version_aware", False):
+            kwargs = {"versions": True}
+        return await self.fs._ls(path, detail=True, **kwargs)
     def rel_path(self, path: str) -> str:
         return self.fs.split_path(path)[1]

datachain/client/gcs.py CHANGED Viewed

@@ -30,7 +30,7 @@ class GCSClient(Client):
         if kwargs.pop("anon", False):
             kwargs["token"] = "anon"  # noqa: S105
-        return cast(GCSFileSystem, super().create_fs(**kwargs))
+        return cast("GCSFileSystem", super().create_fs(**kwargs))
     def url(self, path: str, expires: int = 3600, **kwargs) -> str:
         """

datachain/client/hf.py CHANGED Viewed

@@ -1,25 +1,50 @@
-import os
+import functools
 import posixpath
-from typing import Any, cast
-from huggingface_hub import HfFileSystem
+from typing import Any
 from datachain.lib.file import File
 from .fsspec import Client
+class classproperty:  # noqa: N801
+    def __init__(self, func):
+        self.fget = func
+    def __get__(self, instance, owner):
+        return self.fget(owner)
+@functools.cache
+def get_hf_filesystem_cls():
+    import fsspec
+    from packaging.version import Version, parse
+    fsspec_version = parse(fsspec.__version__)
+    minver = Version("2024.12.0")
+    if fsspec_version < minver:
+        raise ImportError(
+            f"datachain requires 'fsspec>={minver}' but version "
+            f"{fsspec_version} is installed."
+        )
+    from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
+    from huggingface_hub import HfFileSystem
+    fs_cls = AsyncFileSystemWrapper.wrap_class(HfFileSystem)
+    # AsyncFileSystemWrapper does not set class properties, so we need to set them back.
+    fs_cls.protocol = HfFileSystem.protocol
+    return fs_cls
 class HfClient(Client):
-    FS_CLASS = HfFileSystem
     PREFIX = "hf://"
     protocol = "hf"
-    @classmethod
-    def create_fs(cls, **kwargs) -> HfFileSystem:
-        if os.environ.get("HF_TOKEN"):
-            kwargs["token"] = os.environ["HF_TOKEN"]
-        return cast(HfFileSystem, super().create_fs(**kwargs))
+    @classproperty
+    def FS_CLASS(cls):  # noqa: N802, N805
+        return get_hf_filesystem_cls()
     def info_to_file(self, v: dict[str, Any], path: str) -> File:
         return File(
@@ -31,8 +56,5 @@ class HfClient(Client):
             last_modified=v["last_commit"].date,
         )
-    async def ls_dir(self, path):
-        return self.fs.ls(path, detail=True)
     def rel_path(self, path):
         return posixpath.relpath(path, self.name)

datachain/client/local.py CHANGED Viewed

@@ -67,10 +67,7 @@ class FileClient(Client):
     @classmethod
     def split_url(cls, url: str) -> tuple[str, str]:
         parsed = urlparse(url)
-        if parsed.scheme == "file":
-            scheme, rest = url.split(":", 1)
-            url = f"{scheme.lower()}:{rest}"
-        else:
+        if parsed.scheme != "file":
             url = cls.path_to_uri(url)
         fill_path = url[len(cls.PREFIX) :]

datachain/client/s3.py CHANGED Viewed

@@ -55,7 +55,7 @@ class ClientS3(Client):
             except NotImplementedError:
                 pass
-        return cast(S3FileSystem, super().create_fs(**kwargs))
+        return cast("S3FileSystem", super().create_fs(**kwargs))
     def url(self, path: str, expires: int = 3600, **kwargs) -> str:
         """

datachain/data_storage/metastore.py CHANGED Viewed

@@ -119,6 +119,8 @@ class AbstractMetastore(ABC, Serializable):
         query_script: str = "",
         schema: Optional[dict[str, Any]] = None,
         ignore_if_exists: bool = False,
+        description: Optional[str] = None,
+        labels: Optional[list[str]] = None,
     ) -> DatasetRecord:
         """Creates new dataset."""
@@ -518,6 +520,8 @@ class AbstractDBMetastore(AbstractMetastore):
         query_script: str = "",
         schema: Optional[dict[str, Any]] = None,
         ignore_if_exists: bool = False,
+        description: Optional[str] = None,
+        labels: Optional[list[str]] = None,
         **kwargs,  # TODO registered = True / False
     ) -> DatasetRecord:
         """Creates new dataset."""
@@ -533,6 +537,8 @@ class AbstractDBMetastore(AbstractMetastore):
             sources="\n".join(sources) if sources else "",
             query_script=query_script,
             schema=json.dumps(schema or {}),
+            description=description,
+            labels=json.dumps(labels or []),
         )
         if ignore_if_exists and hasattr(query, "on_conflict_do_nothing"):
             # SQLite and PostgreSQL both support 'on_conflict_do_nothing',

datachain/data_storage/warehouse.py CHANGED Viewed

@@ -39,13 +39,6 @@ if TYPE_CHECKING:
     from datachain.data_storage.schema import DataTable
     from datachain.lib.file import File
-try:
-    import numpy as np
-    numpy_imported = True
-except ImportError:
-    numpy_imported = False
 logger = logging.getLogger("datachain")
@@ -96,7 +89,9 @@ class AbstractWarehouse(ABC, Serializable):
         If value is a list or some other iterable, it tries to convert sub elements
         as well
         """
-        if numpy_imported and isinstance(val, (np.ndarray, np.generic)):
+        import numpy as np
+        if isinstance(val, (np.ndarray, np.generic)):
             val = val.tolist()
         # Optimization: Precompute all the column type variables.

datachain/dataset.py CHANGED Viewed

@@ -302,6 +302,7 @@ class DatasetListVersion:
         size: Optional[int],
         query_script: str = "",
         job_id: Optional[str] = None,
+        **kwargs,
     ):
         return cls(
             id,
@@ -648,6 +649,13 @@ class DatasetListRecord:
     def has_version_with_uuid(self, uuid: str) -> bool:
         return any(v.uuid == uuid for v in self.versions)
+    @classmethod
+    def from_dict(cls, d: dict[str, Any]) -> "DatasetListRecord":
+        versions = [DatasetListVersion.parse(**v) for v in d.get("versions", [])]
+        kwargs = {f.name: d[f.name] for f in fields(cls) if f.name in d}
+        kwargs["versions"] = versions
+        return cls(**kwargs)
 class RowDict(dict):
     pass

datachain/error.py CHANGED Viewed

@@ -1,15 +1,3 @@
-import botocore.errorfactory
-import botocore.exceptions
-import gcsfs.retry
-REMOTE_ERRORS = (
-    gcsfs.retry.HttpError,  # GCS
-    OSError,  # GCS
-    botocore.exceptions.BotoCoreError,  # S3
-    ValueError,  # Azure
-)
 class DataChainError(RuntimeError):
     pass

datachain/fs/utils.py ADDED Viewed

@@ -0,0 +1,30 @@
+from typing import TYPE_CHECKING
+from fsspec.implementations.local import LocalFileSystem
+if TYPE_CHECKING:
+    from fsspec import AbstractFileSystem
+def _isdir(fs: "AbstractFileSystem", path: str) -> bool:
+    info = fs.info(path)
+    return info["type"] == "directory" or (
+        info["size"] == 0 and info["type"] == "file" and info["name"].endswith("/")
+    )
+def isfile(fs: "AbstractFileSystem", path: str) -> bool:
+    """
+    Returns True if uri points to a file.
+    Supports special directories on object storages, e.g.:
+    Google creates a zero byte file with the same name as the directory with a trailing
+    slash at the end.
+    """
+    if isinstance(fs, LocalFileSystem):
+        return fs.isfile(path)
+    try:
+        return not _isdir(fs, path)
+    except FileNotFoundError:
+        return False

datachain/func/__init__.py CHANGED Viewed

@@ -18,6 +18,7 @@ from .aggregate import (
 from .array import contains, cosine_distance, euclidean_distance, length, sip_hash_64
 from .conditional import and_, case, greatest, ifelse, isnone, least, or_
 from .numeric import bit_and, bit_hamming_distance, bit_or, bit_xor, int_hash_64
+from .path import file_ext, file_stem, name, parent
 from .random import rand
 from .string import byte_hamming_distance
 from .window import window
@@ -40,6 +41,8 @@ __all__ = [
     "count",
     "dense_rank",
     "euclidean_distance",
+    "file_ext",
+    "file_stem",
     "first",
     "greatest",
     "ifelse",
@@ -50,7 +53,9 @@ __all__ = [
     "literal",
     "max",
     "min",
+    "name",
     "or_",
+    "parent",
     "path",
     "rand",
     "random",

datachain/func/func.py CHANGED Viewed

@@ -3,7 +3,6 @@ from collections.abc import Sequence
 from typing import TYPE_CHECKING, Any, Callable, Optional, Union
 from sqlalchemy import BindParameter, Case, ColumnElement, Integer, cast, desc
-from sqlalchemy.ext.hybrid import Comparator
 from sqlalchemy.sql import func as sa_func
 from datachain.lib.convert.python_to_sql import python_to_sql
@@ -75,6 +74,8 @@ class Func(Function):
     @property
     def _db_cols(self) -> Sequence[ColT]:
+        from sqlalchemy.ext.hybrid import Comparator
         return (
             [
                 col

datachain 0.11.11__py3-none-any.whl → 0.13.0__py3-none-any.whl

Potentially problematic release.

datachain 0.11.11py3-none-any.whl → 0.13.0py3-none-any.whl