PyPI - datachain - Versions diffs - 0.8.7__py3-none-any.whl → 0.8.9__py3-none-any.whl - Mend

datachain 0.8.7py3-none-any.whl → 0.8.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (36) hide show

datachain/cache.py +3 -3
datachain/catalog/catalog.py +1 -1
datachain/cli/__init__.py +12 -4
datachain/cli/commands/datasets.py +2 -3
datachain/cli/parser/__init__.py +51 -69
datachain/cli/parser/job.py +20 -25
datachain/cli/parser/studio.py +22 -46
datachain/cli/parser/utils.py +1 -1
datachain/client/azure.py +1 -1
datachain/client/fsspec.py +1 -1
datachain/client/gcs.py +1 -1
datachain/client/local.py +1 -1
datachain/client/s3.py +1 -1
datachain/data_storage/sqlite.py +1 -1
datachain/data_storage/warehouse.py +1 -1
datachain/lib/arrow.py +2 -2
datachain/lib/convert/unflatten.py +1 -2
datachain/lib/dc.py +38 -11
datachain/lib/file.py +27 -4
datachain/lib/hf.py +1 -1
datachain/lib/listing.py +4 -4
datachain/lib/pytorch.py +3 -1
datachain/lib/udf.py +56 -20
datachain/listing.py +1 -1
datachain/model/bbox.py +9 -9
datachain/model/pose.py +9 -9
datachain/model/segment.py +6 -6
datachain/progress.py +0 -133
datachain/query/dataset.py +19 -12
datachain/studio.py +15 -9
{datachain-0.8.7.dist-info → datachain-0.8.9.dist-info}/METADATA +4 -3
{datachain-0.8.7.dist-info → datachain-0.8.9.dist-info}/RECORD +36 -36
{datachain-0.8.7.dist-info → datachain-0.8.9.dist-info}/LICENSE +0 -0
{datachain-0.8.7.dist-info → datachain-0.8.9.dist-info}/WHEEL +0 -0
{datachain-0.8.7.dist-info → datachain-0.8.9.dist-info}/entry_points.txt +0 -0
{datachain-0.8.7.dist-info → datachain-0.8.9.dist-info}/top_level.txt +0 -0

datachain/cli/parser/studio.py CHANGED Viewed

@@ -1,10 +1,8 @@
 def add_studio_parser(subparsers, parent_parser) -> None:
-    studio_help = "Commands to authenticate DataChain with Iterative Studio"
+    studio_help = "Manage Studio authentication"
     studio_description = (
-        "Authenticate DataChain with Studio and set the token. "
-        "Once this token has been properly configured,\n"
-        "DataChain will utilize it for seamlessly sharing datasets\n"
-        "and using Studio features from CLI"
+        "Manage authentication and settings for Studio. "
+        "Configure tokens for sharing datasets and using Studio features."
     )
     studio_parser = subparsers.add_parser(
@@ -15,14 +13,13 @@ def add_studio_parser(subparsers, parent_parser) -> None:
     )
     studio_subparser = studio_parser.add_subparsers(
         dest="cmd",
-        help="Use `DataChain studio CMD --help` to display command-specific help.",
-        required=True,
+        help="Use `datachain studio CMD --help` to display command-specific help",
     )
-    studio_login_help = "Authenticate DataChain with Studio host"
+    studio_login_help = "Authenticate with Studio"
     studio_login_description = (
-        "By default, this command authenticates the DataChain with Studio\n"
-        "using default scopes and assigns a random name as the token name."
+        "Authenticate with Studio using default scopes. "
+        "A random name will be assigned as the token name if not specified."
     )
     login_parser = studio_subparser.add_parser(
         "login",
@@ -36,14 +33,14 @@ def add_studio_parser(subparsers, parent_parser) -> None:
         "--hostname",
         action="store",
         default=None,
-        help="The hostname of the Studio instance to authenticate with.",
+        help="Hostname of the Studio instance",
     )
     login_parser.add_argument(
         "-s",
         "--scopes",
         action="store",
         default=None,
-        help="The scopes for the authentication token. ",
+        help="Authentication token scopes",
     )
     login_parser.add_argument(
@@ -51,21 +48,20 @@ def add_studio_parser(subparsers, parent_parser) -> None:
         "--name",
         action="store",
         default=None,
-        help="The name of the authentication token. It will be used to\n"
-        "identify token shown in Studio profile.",
+        help="Authentication token name (shown in Studio profile)",
     )
     login_parser.add_argument(
         "--no-open",
         action="store_true",
         default=False,
-        help="Use authentication flow based on user code.\n"
-        "You will be presented with user code to enter in browser.\n"
-        "DataChain will also use this if it cannot launch browser on your behalf.",
+        help="Use code-based authentication without browser",
     )
-    studio_logout_help = "Logout user from Studio"
-    studio_logout_description = "This removes the studio token from your global config."
+    studio_logout_help = "Log out from Studio"
+    studio_logout_description = (
+        "Remove the Studio authentication token from global config."
+    )
     studio_subparser.add_parser(
         "logout",
@@ -74,10 +70,8 @@ def add_studio_parser(subparsers, parent_parser) -> None:
         help=studio_logout_help,
     )
-    studio_team_help = "Set the default team for DataChain"
-    studio_team_description = (
-        "Set the default team for DataChain to use when interacting with Studio."
-    )
+    studio_team_help = "Set default team for Studio operations"
+    studio_team_description = "Set the default team for Studio operations."
     team_parser = studio_subparser.add_parser(
         "team",
@@ -88,39 +82,21 @@ def add_studio_parser(subparsers, parent_parser) -> None:
     team_parser.add_argument(
         "team_name",
         action="store",
-        help="The name of the team to set as the default.",
+        help="Name of the team to set as default",
     )
     team_parser.add_argument(
         "--global",
         action="store_true",
         default=False,
-        help="Set the team globally for all DataChain projects.",
+        help="Set team globally for all projects",
     )
-    studio_token_help = "View the token datachain uses to contact Studio"  # noqa: S105 # nosec B105
+    studio_token_help = "View Studio authentication token"  # noqa: S105
+    studio_token_description = "Display the current authentication token for Studio."  # noqa: S105
     studio_subparser.add_parser(
         "token",
         parents=[parent_parser],
-        description=studio_token_help,
+        description=studio_token_description,
         help=studio_token_help,
     )
-    studio_ls_dataset_help = "List the available datasets from Studio"
-    studio_ls_dataset_description = (
-        "This command lists all the datasets available in Studio.\n"
-        "It will show the dataset name and the number of versions available."
-    )
-    ls_dataset_parser = studio_subparser.add_parser(
-        "dataset",
-        parents=[parent_parser],
-        description=studio_ls_dataset_description,
-        help=studio_ls_dataset_help,
-    )
-    ls_dataset_parser.add_argument(
-        "--team",
-        action="store",
-        default=None,
-        help="The team to list datasets for. By default, it will use team from config.",
-    )

datachain/cli/parser/utils.py CHANGED Viewed

@@ -30,7 +30,7 @@ def add_sources_arg(parser: ArgumentParser, nargs: Union[str, int] = "+") -> Act
         "sources",
         type=str,
         nargs=nargs,
-        help="Data sources - paths to cloud storage dirs",
+        help="Data sources - paths to cloud storage directories",
     )

datachain/client/azure.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import Any, Optional
 from urllib.parse import parse_qs, urlsplit, urlunsplit
 from adlfs import AzureBlobFileSystem
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from datachain.lib.file import File

datachain/client/fsspec.py CHANGED Viewed

@@ -23,7 +23,7 @@ from botocore.exceptions import ClientError
 from dvc_objects.fs.system import reflink
 from fsspec.asyn import get_loop, sync
 from fsspec.callbacks import DEFAULT_CALLBACK, Callback
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from datachain.cache import DataChainCache
 from datachain.client.fileslice import FileWrapper

datachain/client/gcs.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import Any, Optional, cast
 from dateutil.parser import isoparse
 from gcsfs import GCSFileSystem
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from datachain.lib.file import File

datachain/client/local.py CHANGED Viewed

@@ -38,7 +38,7 @@ class FileClient(Client):
     def get_uri(cls, name: str) -> "StorageURI":
         from datachain.dataset import StorageURI
-        return StorageURI(f'{cls.PREFIX}/{name.removeprefix("/")}')
+        return StorageURI(f"{cls.PREFIX}/{name.removeprefix('/')}")
     @classmethod
     def ls_buckets(cls, **kwargs):

datachain/client/s3.py CHANGED Viewed

@@ -5,7 +5,7 @@ from urllib.parse import parse_qs, urlsplit, urlunsplit
 from botocore.exceptions import NoCredentialsError
 from s3fs import S3FileSystem
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from datachain.lib.file import File

datachain/data_storage/sqlite.py CHANGED Viewed

@@ -21,7 +21,7 @@ from sqlalchemy.schema import CreateIndex, CreateTable, DropTable
 from sqlalchemy.sql import func
 from sqlalchemy.sql.expression import bindparam, cast
 from sqlalchemy.sql.selectable import Select
-from tqdm import tqdm
+from tqdm.auto import tqdm
 import datachain.sql.sqlite
 from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse

datachain/data_storage/warehouse.py CHANGED Viewed

@@ -14,7 +14,7 @@ import sqlalchemy as sa
 from sqlalchemy import Table, case, select
 from sqlalchemy.sql import func
 from sqlalchemy.sql.expression import true
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from datachain.client import Client
 from datachain.data_storage.schema import convert_rows_custom_column_types

datachain/lib/arrow.py CHANGED Viewed

@@ -7,7 +7,7 @@ import orjson
 import pyarrow as pa
 from fsspec.core import split_protocol
 from pyarrow.dataset import CsvFileFormat, dataset
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from datachain.lib.data_model import dict_to_data_model
 from datachain.lib.file import ArrowRow, File
@@ -33,7 +33,7 @@ class ReferenceFileSystem(fsspec.implementations.reference.ReferenceFileSystem):
         # reads the whole file in-memory.
         (uri,) = self.references[path]
         protocol, _ = split_protocol(uri)
-        return self.fss[protocol]._open(uri, mode, *args, **kwargs)
+        return self.fss[protocol].open(uri, mode, *args, **kwargs)
 class ArrowGenerator(Generator):

datachain/lib/convert/unflatten.py CHANGED Viewed

@@ -35,8 +35,7 @@ def unflatten_to_json_pos(
 def _normalize(name: str) -> str:
     if DEFAULT_DELIMITER in name:
         raise RuntimeError(
-            f"variable '{name}' cannot be used "
-            f"because it contains {DEFAULT_DELIMITER}"
+            f"variable '{name}' cannot be used because it contains {DEFAULT_DELIMITER}"
         )
     return _to_snake_case(name)

datachain/lib/dc.py CHANGED Viewed

@@ -11,6 +11,7 @@ from typing import (
     BinaryIO,
     Callable,
     ClassVar,
+    Literal,
     Optional,
     TypeVar,
     Union,
@@ -1276,7 +1277,12 @@ class DataChain:
                 yield ret[0] if len(cols) == 1 else tuple(ret)
     def to_pytorch(
-        self, transform=None, tokenizer=None, tokenizer_kwargs=None, num_samples=0
+        self,
+        transform=None,
+        tokenizer=None,
+        tokenizer_kwargs=None,
+        num_samples=0,
+        remove_prefetched: bool = False,
     ):
         """Convert to pytorch dataset format.
@@ -1286,6 +1292,7 @@ class DataChain:
             tokenizer_kwargs (dict): Additional kwargs to pass when calling tokenizer.
             num_samples (int): Number of random samples to draw for each epoch.
                 This argument is ignored if `num_samples=0` (the default).
+            remove_prefetched (bool): Whether to remove prefetched files after reading.
         Example:
             ```py
@@ -1312,6 +1319,7 @@ class DataChain:
             tokenizer_kwargs=tokenizer_kwargs,
             num_samples=num_samples,
             dc_settings=chain._settings,
+            remove_prefetched=remove_prefetched,
         )
     def remove_file_signals(self) -> "Self":  # noqa: D102
@@ -1330,19 +1338,27 @@ class DataChain:
         Parameters:
             right_ds: Chain to join with.
-            on: Predicate or list of Predicates to join on. If both chains have the
-                same predicates then this predicate is enough for the join. Otherwise,
-                `right_on` parameter has to specify the predicates for the other chain.
-            right_on: Optional predicate or list of Predicates
-                    for the `right_ds` to join.
+            on: Predicate ("column.name", C("column.name"), or Func) or list of
+                Predicates to join on. If both chains have the same predicates then
+                this predicate is enough for the join. Otherwise, `right_on` parameter
+                has to specify the predicates for the other chain.
+            right_on: Optional predicate or list of Predicates for the `right_ds`
+                to join.
             inner (bool): Whether to run inner join or outer join.
-            rname (str): name prefix for conflicting signal names.
+            rname (str): Name prefix for conflicting signal names.
-        Example:
+        Examples:
             ```py
             meta = meta_emd.merge(meta_pq, on=(C.name, C.emd__index),
                                   right_on=(C.name, C.pq__index))
             ```
+            ```py
+            imgs.merge(captions,
+                       on=func.path.file_stem(imgs.c("file.path")),
+                       right_on=func.path.file_stem(captions.c("file.path"))
+            ```
+        )
         """
         if on is None:
             raise DatasetMergeError(["None"], None, "'on' must be specified")
@@ -2407,11 +2423,22 @@ class DataChain:
     def export_files(
         self,
         output: str,
-        signal="file",
+        signal: str = "file",
         placement: FileExportPlacement = "fullpath",
         use_cache: bool = True,
+        link_type: Literal["copy", "symlink"] = "copy",
     ) -> None:
-        """Method that exports all files from chain to some folder."""
+        """Export files from a specified signal to a directory.
+        Args:
+            output: Path to the target directory for exporting files.
+            signal: Name of the signal to export files from.
+            placement: The method to use for naming exported files.
+                The possible values are: "filename", "etag", "fullpath", and "checksum".
+            use_cache: If `True`, cache the files before exporting.
+            link_type: Method to use for exporting files.
+                Falls back to `'copy'` if symlinking fails.
+        """
         if placement == "filename" and (
             self._query.distinct(pathfunc.name(C(f"{signal}__path"))).count()
             != self._query.count()
@@ -2419,7 +2446,7 @@ class DataChain:
             raise ValueError("Files with the same name found")
         for file in self.collect(signal):
-            file.export(output, placement, use_cache)  # type: ignore[union-attr]
+            file.export(output, placement, use_cache, link_type=link_type)  # type: ignore[union-attr]
     def shuffle(self) -> "Self":
         """Shuffle the rows of the chain deterministically."""

datachain/lib/file.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import errno
 import hashlib
 import io
 import json
@@ -76,18 +77,18 @@ class TarVFile(VFile):
     def open(cls, file: "File", location: list[dict]):
         """Stream file from tar archive based on location in archive."""
         if len(location) > 1:
-            VFileError(file, "multiple 'location's are not supported yet")
+            raise VFileError(file, "multiple 'location's are not supported yet")
         loc = location[0]
         if (offset := loc.get("offset", None)) is None:
-            VFileError(file, "'offset' is not specified")
+            raise VFileError(file, "'offset' is not specified")
         if (size := loc.get("size", None)) is None:
-            VFileError(file, "'size' is not specified")
+            raise VFileError(file, "'size' is not specified")
         if (parent := loc.get("parent", None)) is None:
-            VFileError(file, "'parent' is not specified")
+            raise VFileError(file, "'parent' is not specified")
         tar_file = File(**parent)
         tar_file._set_stream(file._catalog)
@@ -236,11 +237,26 @@ class File(DataModel):
         with open(destination, mode="wb") as f:
             f.write(self.read())
+    def _symlink_to(self, destination: str):
+        if self.location:
+            raise OSError(errno.ENOTSUP, "Symlinking virtual file is not supported")
+        if self._caching_enabled:
+            self.ensure_cached()
+            source = self.get_local_path()
+            assert source, "File was not cached"
+        elif self.source.startswith("file://"):
+            source = self.get_path()
+        else:
+            raise OSError(errno.EXDEV, "can't link across filesystems")
+        return os.symlink(source, destination)
     def export(
         self,
         output: str,
         placement: ExportPlacement = "fullpath",
         use_cache: bool = True,
+        link_type: Literal["copy", "symlink"] = "copy",
     ) -> None:
         """Export file to new location."""
         if use_cache:
@@ -249,6 +265,13 @@ class File(DataModel):
         dst_dir = os.path.dirname(dst)
         os.makedirs(dst_dir, exist_ok=True)
+        if link_type == "symlink":
+            try:
+                return self._symlink_to(dst)
+            except OSError as exc:
+                if exc.errno not in (errno.ENOTSUP, errno.EXDEV, errno.ENOSYS):
+                    raise
         self.save(dst)
     def _set_stream(

datachain/lib/hf.py CHANGED Viewed

@@ -29,7 +29,7 @@ from io import BytesIO
 from typing import TYPE_CHECKING, Any, Union
 import PIL
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from datachain.lib.arrow import arrow_type_mapper
 from datachain.lib.data_model import DataModel, DataType, dict_to_data_model

datachain/lib/listing.py CHANGED Viewed

@@ -113,14 +113,14 @@ def parse_listing_uri(uri: str, cache, client_config) -> tuple[Optional[str], st
     telemetry.log_param("client", client.PREFIX)
     if not uri.endswith("/") and _isfile(client, uri):
-        return None, f'{storage_uri}/{path.lstrip("/")}', path
+        return None, f"{storage_uri}/{path.lstrip('/')}", path
     if uses_glob(path):
         lst_uri_path = posixpath.dirname(path)
     else:
-        storage_uri, path = Client.parse_url(f'{uri.rstrip("/")}/')
+        storage_uri, path = Client.parse_url(f"{uri.rstrip('/')}/")
         lst_uri_path = path
-    lst_uri = f'{storage_uri}/{lst_uri_path.lstrip("/")}'
+    lst_uri = f"{storage_uri}/{lst_uri_path.lstrip('/')}"
     ds_name = (
         f"{LISTING_PREFIX}{storage_uri}/{posixpath.join(lst_uri_path, '').lstrip('/')}"
     )
@@ -180,7 +180,7 @@ def get_listing(
     # for local file system we need to fix listing path / prefix
     # if we are reusing existing listing
     if isinstance(client, FileClient) and listing and listing.name != ds_name:
-        list_path = f'{ds_name.strip("/").removeprefix(listing.name)}/{list_path}'
+        list_path = f"{ds_name.strip('/').removeprefix(listing.name)}/{list_path}"
     ds_name = listing.name if listing else ds_name

datachain/lib/pytorch.py CHANGED Viewed

@@ -50,6 +50,7 @@ class PytorchDataset(IterableDataset):
         tokenizer_kwargs: Optional[dict[str, Any]] = None,
         num_samples: int = 0,
         dc_settings: Optional[Settings] = None,
+        remove_prefetched: bool = False,
     ):
         """
         Pytorch IterableDataset that streams DataChain datasets.
@@ -84,6 +85,7 @@ class PytorchDataset(IterableDataset):
         self._cache = catalog.cache
         self._prefetch_cache: Optional[Cache] = None
+        self._remove_prefetched = remove_prefetched
         if prefetch and not self.cache:
             tmp_dir = catalog.cache.tmp_dir
             assert tmp_dir
@@ -147,7 +149,7 @@ class PytorchDataset(IterableDataset):
             rows,
             self.prefetch,
             download_cb=download_cb,
-            after_prefetch=download_cb.increment_file_count,
+            remove_prefetched=self._remove_prefetched,
         )
         with download_cb, closing(rows):

datachain/lib/udf.py CHANGED Viewed

@@ -16,6 +16,7 @@ from datachain.lib.convert.flatten import flatten
 from datachain.lib.data_model import DataValue
 from datachain.lib.file import File
 from datachain.lib.utils import AbstractUDF, DataChainError, DataChainParamsError
+from datachain.progress import CombinedDownloadCallback
 from datachain.query.batch import (
     Batch,
     BatchingStrategy,
@@ -301,20 +302,42 @@ async def _prefetch_input(
     return row
+def _remove_prefetched(row: T) -> None:
+    for obj in row:
+        if isinstance(obj, File):
+            catalog = obj._catalog
+            assert catalog is not None
+            try:
+                catalog.cache.remove(obj)
+            except Exception as e:  # noqa: BLE001
+                print(f"Failed to remove prefetched item {obj.name!r}: {e!s}")
 def _prefetch_inputs(
     prepared_inputs: "Iterable[T]",
     prefetch: int = 0,
     download_cb: Optional["Callback"] = None,
-    after_prefetch: "Callable[[], None]" = noop,
+    after_prefetch: Optional[Callable[[], None]] = None,
+    remove_prefetched: bool = False,
 ) -> "abc.Generator[T, None, None]":
-    if prefetch > 0:
-        f = partial(
-            _prefetch_input,
-            download_cb=download_cb,
-            after_prefetch=after_prefetch,
-        )
-        prepared_inputs = AsyncMapper(f, prepared_inputs, workers=prefetch).iterate()  # type: ignore[assignment]
-    yield from prepared_inputs
+    if not prefetch:
+        yield from prepared_inputs
+        return
+    if after_prefetch is None:
+        after_prefetch = noop
+        if isinstance(download_cb, CombinedDownloadCallback):
+            after_prefetch = download_cb.increment_file_count
+    f = partial(_prefetch_input, download_cb=download_cb, after_prefetch=after_prefetch)
+    mapper = AsyncMapper(f, prepared_inputs, workers=prefetch)
+    with closing(mapper.iterate()) as row_iter:
+        for row in row_iter:
+            try:
+                yield row  # type: ignore[misc]
+            finally:
+                if remove_prefetched:
+                    _remove_prefetched(row)
 def _get_cache(
@@ -351,7 +374,13 @@ class Mapper(UDFBase):
                     )
         prepared_inputs = _prepare_rows(udf_inputs)
-        prepared_inputs = _prefetch_inputs(prepared_inputs, self.prefetch)
+        prepared_inputs = _prefetch_inputs(
+            prepared_inputs,
+            self.prefetch,
+            download_cb=download_cb,
+            remove_prefetched=bool(self.prefetch) and not cache,
+        )
         with closing(prepared_inputs):
             for id_, *udf_args in prepared_inputs:
                 result_objs = self.process_safe(udf_args)
@@ -391,9 +420,9 @@ class BatchMapper(UDFBase):
             )
             result_objs = list(self.process_safe(udf_args))
             n_objs = len(result_objs)
-            assert (
-                n_objs == n_rows
-            ), f"{self.name} returns {n_objs} rows, but {n_rows} were expected"
+            assert n_objs == n_rows, (
+                f"{self.name} returns {n_objs} rows, but {n_rows} were expected"
+            )
             udf_outputs = (self._flatten_row(row) for row in result_objs)
             output = [
                 {"sys__id": row_id} | dict(zip(self.signal_names, signals))
@@ -429,15 +458,22 @@ class Generator(UDFBase):
                         row, udf_fields, catalog, cache, download_cb
                     )
+        def _process_row(row):
+            with safe_closing(self.process_safe(row)) as result_objs:
+                for result_obj in result_objs:
+                    udf_output = self._flatten_row(result_obj)
+                    yield dict(zip(self.signal_names, udf_output))
         prepared_inputs = _prepare_rows(udf_inputs)
-        prepared_inputs = _prefetch_inputs(prepared_inputs, self.prefetch)
+        prepared_inputs = _prefetch_inputs(
+            prepared_inputs,
+            self.prefetch,
+            download_cb=download_cb,
+            remove_prefetched=bool(self.prefetch) and not cache,
+        )
         with closing(prepared_inputs):
-            for row in prepared_inputs:
-                result_objs = self.process_safe(row)
-                udf_outputs = (self._flatten_row(row) for row in result_objs)
-                output = (dict(zip(self.signal_names, row)) for row in udf_outputs)
-                processed_cb.relative_update(1)
-                yield output
+            for row in processed_cb.wrap(prepared_inputs):
+                yield _process_row(row)
         self.teardown()

datachain/listing.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Optional
 from sqlalchemy import Column
 from sqlalchemy.sql import func
-from tqdm import tqdm
+from tqdm.auto import tqdm
 from datachain.node import DirType, Node, NodeWithPath
 from datachain.sql.functions import path as pathfunc

datachain/model/bbox.py CHANGED Viewed

@@ -22,9 +22,9 @@ class BBox(DataModel):
     @staticmethod
     def from_list(coords: list[float], title: str = "") -> "BBox":
         assert len(coords) == 4, "Bounding box must be a list of 4 coordinates."
-        assert all(
-            isinstance(value, (int, float)) for value in coords
-        ), "Bounding box coordinates must be floats or integers."
+        assert all(isinstance(value, (int, float)) for value in coords), (
+            "Bounding box coordinates must be floats or integers."
+        )
         return BBox(
             title=title,
             coords=[round(c) for c in coords],
@@ -64,12 +64,12 @@ class OBBox(DataModel):
     @staticmethod
     def from_list(coords: list[float], title: str = "") -> "OBBox":
-        assert (
-            len(coords) == 8
-        ), "Oriented bounding box must be a list of 8 coordinates."
-        assert all(
-            isinstance(value, (int, float)) for value in coords
-        ), "Oriented bounding box coordinates must be floats or integers."
+        assert len(coords) == 8, (
+            "Oriented bounding box must be a list of 8 coordinates."
+        )
+        assert all(isinstance(value, (int, float)) for value in coords), (
+            "Oriented bounding box coordinates must be floats or integers."
+        )
         return OBBox(
             title=title,
             coords=[round(c) for c in coords],

datachain 0.8.7__py3-none-any.whl → 0.8.9__py3-none-any.whl

Potentially problematic release.

datachain 0.8.7py3-none-any.whl → 0.8.9py3-none-any.whl