PyPI - datachain - Versions diffs - 0.28.0__py3-none-any.whl → 0.28.2__py3-none-any.whl - Mend

datachain 0.28.0py3-none-any.whl → 0.28.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (14) hide show

datachain/lib/dc/datachain.py +45 -17
datachain/lib/dc/records.py +4 -2
datachain/lib/file.py +53 -1
datachain/lib/settings.py +23 -0
datachain/lib/udf.py +27 -4
datachain/lib/utils.py +96 -0
datachain/query/dataset.py +18 -20
datachain/utils.py +37 -22
{datachain-0.28.0.dist-info → datachain-0.28.2.dist-info}/METADATA +1 -1
{datachain-0.28.0.dist-info → datachain-0.28.2.dist-info}/RECORD +14 -14
{datachain-0.28.0.dist-info → datachain-0.28.2.dist-info}/WHEEL +0 -0
{datachain-0.28.0.dist-info → datachain-0.28.2.dist-info}/entry_points.txt +0 -0
{datachain-0.28.0.dist-info → datachain-0.28.2.dist-info}/licenses/LICENSE +0 -0
{datachain-0.28.0.dist-info → datachain-0.28.2.dist-info}/top_level.txt +0 -0

datachain/lib/dc/datachain.py CHANGED Viewed

@@ -324,6 +324,7 @@ class DataChain:
         sys: Optional[bool] = None,
         namespace: Optional[str] = None,
         project: Optional[str] = None,
+        batch_rows: Optional[int] = None,
     ) -> "Self":
         """Change settings for chain.
@@ -331,22 +332,24 @@ class DataChain:
         It returns chain, so, it can be chained later with next operation.
         Parameters:
-            cache : data caching (default=False)
+            cache : data caching. (default=False)
             parallel : number of thread for processors. True is a special value to
-                enable all available CPUs (default=1)
+                enable all available CPUs. (default=1)
             workers : number of distributed workers. Only for Studio mode. (default=1)
-            min_task_size : minimum number of tasks (default=1)
-            prefetch: number of workers to use for downloading files in advance.
+            min_task_size : minimum number of tasks. (default=1)
+            prefetch : number of workers to use for downloading files in advance.
                       This is enabled by default and uses 2 workers.
                       To disable prefetching, set it to 0.
-            namespace: namespace name.
-            project: project name.
+            namespace : namespace name.
+            project : project name.
+            batch_rows : row limit per insert to balance speed and memory usage.
+                      (default=2000)
         Example:
             ```py
             chain = (
                 chain
-                .settings(cache=True, parallel=8)
+                .settings(cache=True, parallel=8, batch_rows=300)
                 .map(laion=process_webdataset(spec=WDSLaion), params="file")
             )
             ```
@@ -356,7 +359,14 @@ class DataChain:
         settings = copy.copy(self._settings)
         settings.add(
             Settings(
-                cache, parallel, workers, min_task_size, prefetch, namespace, project
+                cache,
+                parallel,
+                workers,
+                min_task_size,
+                prefetch,
+                namespace,
+                project,
+                batch_rows,
             )
         )
         return self._evolve(settings=settings, _sys=sys)
@@ -711,7 +721,7 @@ class DataChain:
         return self._evolve(
             query=self._query.add_signals(
-                udf_obj.to_udf_wrapper(),
+                udf_obj.to_udf_wrapper(self._settings.batch_rows),
                 **self._settings.to_dict(),
             ),
             signal_schema=self.signals_schema | udf_obj.output,
@@ -749,7 +759,7 @@ class DataChain:
             udf_obj.prefetch = prefetch
         return self._evolve(
             query=self._query.generate(
-                udf_obj.to_udf_wrapper(),
+                udf_obj.to_udf_wrapper(self._settings.batch_rows),
                 **self._settings.to_dict(),
             ),
             signal_schema=udf_obj.output,
@@ -885,7 +895,7 @@ class DataChain:
         udf_obj = self._udf_to_obj(Aggregator, func, params, output, signal_map)
         return self._evolve(
             query=self._query.generate(
-                udf_obj.to_udf_wrapper(),
+                udf_obj.to_udf_wrapper(self._settings.batch_rows),
                 partition_by=processed_partition_by,
                 **self._settings.to_dict(),
             ),
@@ -917,11 +927,24 @@ class DataChain:
             )
             chain.save("new_dataset")
             ```
+        .. deprecated:: 0.29.0
+            This method is deprecated and will be removed in a future version.
+            Use `agg()` instead, which provides the similar functionality.
         """
+        import warnings
+        warnings.warn(
+            "batch_map() is deprecated and will be removed in a future version. "
+            "Use agg() instead, which provides the similar functionality.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         udf_obj = self._udf_to_obj(BatchMapper, func, params, output, signal_map)
         return self._evolve(
             query=self._query.add_signals(
-                udf_obj.to_udf_wrapper(batch),
+                udf_obj.to_udf_wrapper(self._settings.batch_rows, batch=batch),
                 **self._settings.to_dict(),
             ),
             signal_schema=self.signals_schema | udf_obj.output,
@@ -2340,7 +2363,7 @@ class DataChain:
     def setup(self, **kwargs) -> "Self":
         """Setup variables to pass to UDF functions.
-        Use before running map/gen/agg/batch_map to save an object and pass it as an
+        Use before running map/gen/agg to save an object and pass it as an
         argument to the UDF.
         The value must be a callable (a `lambda: <value>` syntax can be used to quickly
@@ -2419,9 +2442,11 @@ class DataChain:
             ds.to_storage("gs://mybucket", placement="filename")
             ```
         """
+        chain = self.persist()
+        count = chain.count()
         if placement == "filename" and (
-            self._query.distinct(pathfunc.name(C(f"{signal}__path"))).count()
-            != self._query.count()
+            chain._query.distinct(pathfunc.name(C(f"{signal}__path"))).count() != count
         ):
             raise ValueError("Files with the same name found")
@@ -2433,7 +2458,7 @@ class DataChain:
             unit=" files",
             unit_scale=True,
             unit_divisor=10,
-            total=self.count(),
+            total=count,
             leave=False,
         )
         file_exporter = FileExporter(
@@ -2444,7 +2469,10 @@ class DataChain:
             max_threads=num_threads or 1,
             client_config=client_config,
         )
-        file_exporter.run(self.to_values(signal), progress_bar)
+        file_exporter.run(
+            (rows[0] for rows in chain.to_iter(signal)),
+            progress_bar,
+        )
     def shuffle(self) -> "Self":
         """Shuffle the rows of the chain deterministically."""

datachain/lib/dc/records.py CHANGED Viewed

@@ -15,6 +15,8 @@ if TYPE_CHECKING:
     P = ParamSpec("P")
+READ_RECORDS_BATCH_SIZE = 10000
 def read_records(
     to_insert: Optional[Union[dict, Iterable[dict]]],
@@ -41,7 +43,7 @@ def read_records(
     Notes:
         This call blocks until all records are inserted.
     """
-    from datachain.query.dataset import INSERT_BATCH_SIZE, adjust_outputs, get_col_types
+    from datachain.query.dataset import adjust_outputs, get_col_types
     from datachain.sql.types import SQLType
     from datachain.utils import batched
@@ -94,7 +96,7 @@ def read_records(
         {c.name: c.type for c in columns if isinstance(c.type, SQLType)},
     )
     records = (adjust_outputs(warehouse, record, col_types) for record in to_insert)
-    for chunk in batched(records, INSERT_BATCH_SIZE):
+    for chunk in batched(records, READ_RECORDS_BATCH_SIZE):
         warehouse.insert_rows(table, chunk)
     warehouse.insert_rows_done(table)
     return read_dataset(name=dsr.full_name, session=session, settings=settings)

datachain/lib/file.py CHANGED Viewed

@@ -23,7 +23,7 @@ from pydantic import Field, field_validator
 from datachain.client.fileslice import FileSlice
 from datachain.lib.data_model import DataModel
-from datachain.lib.utils import DataChainError
+from datachain.lib.utils import DataChainError, rebase_path
 from datachain.nodes_thread_pool import NodesThreadPool
 from datachain.sql.types import JSON, Boolean, DateTime, Int, String
 from datachain.utils import TIME_ZERO
@@ -634,6 +634,40 @@ class File(DataModel):
             location=self.location,
         )
+    def rebase(
+        self,
+        old_base: str,
+        new_base: str,
+        suffix: str = "",
+        extension: str = "",
+    ) -> str:
+        """
+        Rebase the file's URI from one base directory to another.
+        Args:
+            old_base: Base directory to remove from the file's URI
+            new_base: New base directory to prepend
+            suffix: Optional suffix to add before file extension
+            extension: Optional new file extension (without dot)
+        Returns:
+            str: Rebased URI with new base directory
+        Raises:
+            ValueError: If old_base is not found in the file's URI
+        Examples:
+            >>> file = File(source="s3://bucket", path="data/2025-05-27/file.wav")
+            >>> file.rebase("s3://bucket/data", "s3://output-bucket/processed", \
+                    extension="mp3")
+            's3://output-bucket/processed/2025-05-27/file.mp3'
+            >>> file.rebase("data/audio", "/local/output", suffix="_ch1",
+                    extension="npy")
+            '/local/output/file_ch1.npy'
+        """
+        return rebase_path(self.get_uri(), old_base, new_base, suffix, extension)
 def resolve(file: File) -> File:
     """
@@ -1219,6 +1253,24 @@ class Audio(DataModel):
     codec: str = Field(default="")
     bit_rate: int = Field(default=-1)
+    @staticmethod
+    def get_channel_name(num_channels: int, channel_idx: int) -> str:
+        """Map channel index to meaningful name based on common audio formats"""
+        channel_mappings = {
+            1: ["Mono"],
+            2: ["Left", "Right"],
+            4: ["W", "X", "Y", "Z"],  # First-order Ambisonics
+            6: ["FL", "FR", "FC", "LFE", "BL", "BR"],  # 5.1 surround
+            8: ["FL", "FR", "FC", "LFE", "BL", "BR", "SL", "SR"],  # 7.1 surround
+        }
+        if num_channels in channel_mappings:
+            channels = channel_mappings[num_channels]
+            if 0 <= channel_idx < len(channels):
+                return channels[channel_idx]
+        return f"Ch{channel_idx + 1}"
 class ArrowRow(DataModel):
     """`DataModel` for reading row from Arrow-supported file."""

datachain/lib/settings.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from datachain.lib.utils import DataChainParamsError
+from datachain.utils import DEFAULT_CHUNK_ROWS
 class SettingsError(DataChainParamsError):
@@ -16,6 +17,7 @@ class Settings:
         prefetch=None,
         namespace=None,
         project=None,
+        batch_rows=None,
     ):
         self._cache = cache
         self.parallel = parallel
@@ -24,6 +26,7 @@ class Settings:
         self.prefetch = prefetch
         self.namespace = namespace
         self.project = project
+        self._chunk_rows = batch_rows
         if not isinstance(cache, bool) and cache is not None:
             raise SettingsError(
@@ -53,6 +56,18 @@ class Settings:
                 f", {min_task_size.__class__.__name__} was given"
             )
+        if batch_rows is not None and not isinstance(batch_rows, int):
+            raise SettingsError(
+                "'batch_rows' argument must be int or None"
+                f", {batch_rows.__class__.__name__} was given"
+            )
+        if batch_rows is not None and batch_rows <= 0:
+            raise SettingsError(
+                "'batch_rows' argument must be positive integer"
+                f", {batch_rows} was given"
+            )
     @property
     def cache(self):
         return self._cache if self._cache is not None else False
@@ -61,6 +76,10 @@ class Settings:
     def workers(self):
         return self._workers if self._workers is not None else False
+    @property
+    def batch_rows(self):
+        return self._chunk_rows if self._chunk_rows is not None else DEFAULT_CHUNK_ROWS
     def to_dict(self):
         res = {}
         if self._cache is not None:
@@ -75,6 +94,8 @@ class Settings:
             res["namespace"] = self.namespace
         if self.project is not None:
             res["project"] = self.project
+        if self._chunk_rows is not None:
+            res["batch_rows"] = self._chunk_rows
         return res
     def add(self, settings: "Settings"):
@@ -86,3 +107,5 @@ class Settings:
         self.project = settings.project or self.project
         if settings.prefetch is not None:
             self.prefetch = settings.prefetch
+        if settings._chunk_rows is not None:
+            self._chunk_rows = settings._chunk_rows

datachain/lib/udf.py CHANGED Viewed

@@ -62,19 +62,21 @@ class UDFProperties:
         return self.udf.get_batching(use_partitioning)
     @property
-    def batch(self):
-        return self.udf.batch
+    def batch_rows(self):
+        return self.udf.batch_rows
 @attrs.define(slots=False)
 class UDFAdapter:
     inner: "UDFBase"
     output: UDFOutputSpec
+    batch_rows: Optional[int] = None
     batch: int = 1
     def get_batching(self, use_partitioning: bool = False) -> BatchingStrategy:
         if use_partitioning:
             return Partition()
         if self.batch == 1:
             return NoBatching()
         if self.batch > 1:
@@ -233,10 +235,15 @@ class UDFBase(AbstractUDF):
     def signal_names(self) -> Iterable[str]:
         return self.output.to_udf_spec().keys()
-    def to_udf_wrapper(self, batch: int = 1) -> UDFAdapter:
+    def to_udf_wrapper(
+        self,
+        batch_rows: Optional[int] = None,
+        batch: int = 1,
+    ) -> UDFAdapter:
         return UDFAdapter(
             self,
             self.output.to_udf_spec(),
+            batch_rows,
             batch,
         )
@@ -418,11 +425,27 @@ class Mapper(UDFBase):
 class BatchMapper(UDFBase):
-    """Inherit from this class to pass to `DataChain.batch_map()`."""
+    """Inherit from this class to pass to `DataChain.batch_map()`.
+    .. deprecated:: 0.29.0
+        This class is deprecated and will be removed in a future version.
+        Use `Aggregator` instead, which provides the similar functionality.
+    """
     is_input_batched = True
     is_output_batched = True
+    def __init__(self):
+        import warnings
+        warnings.warn(
+            "BatchMapper is deprecated and will be removed in a future version. "
+            "Use Aggregator instead, which provides the similar functionality.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        super().__init__()
     def run(
         self,
         udf_fields: Sequence[str],

datachain/lib/utils.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import re
 from abc import ABC, abstractmethod
 from collections.abc import Sequence
+from pathlib import PurePosixPath
+from urllib.parse import urlparse
 class AbstractUDF(ABC):
@@ -57,3 +59,97 @@ def normalize_col_names(col_names: Sequence[str]) -> dict[str, str]:
         new_col_names[generated_column] = org_column
     return new_col_names
+def rebase_path(
+    src_path: str,
+    old_base: str,
+    new_base: str,
+    suffix: str = "",
+    extension: str = "",
+) -> str:
+    """
+    Rebase a file path from one base directory to another.
+    Args:
+        src_path: Source file path (can include URI scheme like s3://)
+        old_base: Base directory to remove from src_path
+        new_base: New base directory to prepend
+        suffix: Optional suffix to add before file extension
+        extension: Optional new file extension (without dot)
+    Returns:
+        str: Rebased path with new base directory
+    Raises:
+        ValueError: If old_base is not found in src_path
+    """
+    # Parse URIs to handle schemes properly
+    src_parsed = urlparse(src_path)
+    old_base_parsed = urlparse(old_base)
+    new_base_parsed = urlparse(new_base)
+    # Get the path component (without scheme)
+    if src_parsed.scheme:
+        src_path_only = src_parsed.netloc + src_parsed.path
+    else:
+        src_path_only = src_path
+    if old_base_parsed.scheme:
+        old_base_only = old_base_parsed.netloc + old_base_parsed.path
+    else:
+        old_base_only = old_base
+    # Normalize paths
+    src_path_norm = PurePosixPath(src_path_only).as_posix()
+    old_base_norm = PurePosixPath(old_base_only).as_posix()
+    # Find where old_base appears in src_path
+    if old_base_norm in src_path_norm:
+        # Find the index where old_base appears
+        idx = src_path_norm.find(old_base_norm)
+        if idx == -1:
+            raise ValueError(f"old_base '{old_base}' not found in src_path")
+        # Extract the relative path after old_base
+        relative_start = idx + len(old_base_norm)
+        # Skip leading slash if present
+        if relative_start < len(src_path_norm) and src_path_norm[relative_start] == "/":
+            relative_start += 1
+        relative_path = src_path_norm[relative_start:]
+    else:
+        raise ValueError(f"old_base '{old_base}' not found in src_path")
+    # Parse the filename
+    path_obj = PurePosixPath(relative_path)
+    stem = path_obj.stem
+    current_ext = path_obj.suffix
+    # Apply suffix and extension changes
+    new_stem = stem + suffix if suffix else stem
+    if extension:
+        new_ext = f".{extension}"
+    elif current_ext:
+        new_ext = current_ext
+    else:
+        new_ext = ""
+    # Build new filename
+    new_name = new_stem + new_ext
+    # Reconstruct path with new base
+    parent = str(path_obj.parent)
+    if parent == ".":
+        new_relative_path = new_name
+    else:
+        new_relative_path = str(PurePosixPath(parent) / new_name)
+    # Handle new_base URI scheme
+    if new_base_parsed.scheme:
+        # Has schema like s3://
+        base_path = new_base_parsed.netloc + new_base_parsed.path
+        base_path = PurePosixPath(base_path).as_posix()
+        full_path = str(PurePosixPath(base_path) / new_relative_path)
+        return f"{new_base_parsed.scheme}://{full_path}"
+    # Regular path
+    return str(PurePosixPath(new_base) / new_relative_path)

datachain/query/dataset.py CHANGED Viewed

@@ -333,32 +333,24 @@ def process_udf_outputs(
     udf_table: "Table",
     udf_results: Iterator[Iterable["UDFResult"]],
     udf: "UDFAdapter",
-    batch_size: int = INSERT_BATCH_SIZE,
     cb: Callback = DEFAULT_CALLBACK,
 ) -> None:
-    import psutil
-    rows: list[UDFResult] = []
     # Optimization: Compute row types once, rather than for every row.
     udf_col_types = get_col_types(warehouse, udf.output)
+    batch_rows = udf.batch_rows or INSERT_BATCH_SIZE
-    for udf_output in udf_results:
-        if not udf_output:
-            continue
-        with safe_closing(udf_output):
-            for row in udf_output:
-                cb.relative_update()
-                rows.append(adjust_outputs(warehouse, row, udf_col_types))
-                if len(rows) >= batch_size or (
-                    len(rows) % 10 == 0 and psutil.virtual_memory().percent > 80
-                ):
-                    for row_chunk in batched(rows, batch_size):
-                        warehouse.insert_rows(udf_table, row_chunk)
-                    rows.clear()
+    def _insert_rows():
+        for udf_output in udf_results:
+            if not udf_output:
+                continue
+            with safe_closing(udf_output):
+                for row in udf_output:
+                    cb.relative_update()
+                    yield adjust_outputs(warehouse, row, udf_col_types)
-    if rows:
-        for row_chunk in batched(rows, batch_size):
-            warehouse.insert_rows(udf_table, row_chunk)
+    for row_chunk in batched(_insert_rows(), batch_rows):
+        warehouse.insert_rows(udf_table, row_chunk)
     warehouse.insert_rows_done(udf_table)
@@ -401,6 +393,7 @@ class UDFStep(Step, ABC):
     min_task_size: Optional[int] = None
     is_generator = False
     cache: bool = False
+    batch_rows: Optional[int] = None
     @abstractmethod
     def create_udf_table(self, query: Select) -> "Table":
@@ -602,6 +595,7 @@ class UDFStep(Step, ABC):
                 parallel=self.parallel,
                 workers=self.workers,
                 min_task_size=self.min_task_size,
+                batch_rows=self.batch_rows,
             )
         return self.__class__(self.udf, self.catalog)
@@ -1633,6 +1627,7 @@ class DatasetQuery:
         min_task_size: Optional[int] = None,
         partition_by: Optional[PartitionByType] = None,
         cache: bool = False,
+        batch_rows: Optional[int] = None,
     ) -> "Self":
         """
         Adds one or more signals based on the results from the provided UDF.
@@ -1658,6 +1653,7 @@ class DatasetQuery:
                 workers=workers,
                 min_task_size=min_task_size,
                 cache=cache,
+                batch_rows=batch_rows,
             )
         )
         return query
@@ -1679,6 +1675,7 @@ class DatasetQuery:
         namespace: Optional[str] = None,
         project: Optional[str] = None,
         cache: bool = False,
+        batch_rows: Optional[int] = None,
     ) -> "Self":
         query = self.clone()
         steps = query.steps
@@ -1691,6 +1688,7 @@ class DatasetQuery:
                 workers=workers,
                 min_task_size=min_task_size,
                 cache=cache,
+                batch_rows=batch_rows,
             )
         )
         return query

datachain/utils.py CHANGED Viewed

@@ -11,7 +11,6 @@ import time
 from collections.abc import Iterable, Iterator, Sequence
 from contextlib import contextmanager
 from datetime import date, datetime, timezone
-from itertools import chain, islice
 from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union
 from uuid import UUID
@@ -26,6 +25,8 @@ if TYPE_CHECKING:
     from typing_extensions import Self
+DEFAULT_CHUNK_ROWS = 2000
 logger = logging.getLogger("datachain")
 NUL = b"\0"
@@ -225,30 +226,44 @@ def get_envs_by_prefix(prefix: str) -> dict[str, str]:
 _T_co = TypeVar("_T_co", covariant=True)
-def batched(iterable: Iterable[_T_co], n: int) -> Iterator[tuple[_T_co, ...]]:
-    """Batch data into tuples of length n. The last batch may be shorter."""
-    # Based on: https://docs.python.org/3/library/itertools.html#itertools-recipes
-    # batched('ABCDEFG', 3) --> ABC DEF G
-    if n < 1:
-        raise ValueError("Batch size must be at least one")
-    it = iter(iterable)
-    while batch := tuple(islice(it, n)):
+def _dynamic_batched_core(
+    iterable: Iterable[_T_co],
+    batch_rows: int,
+) -> Iterator[list[_T_co]]:
+    """Core batching logic that yields lists."""
+    batch: list[_T_co] = []
+    for item in iterable:
+        # Check if adding this item would exceed limits
+        if len(batch) >= batch_rows and batch:  # Yield current batch if we have one
+            yield batch
+            batch = []
+        batch.append(item)
+    # Yield any remaining items
+    if batch:
         yield batch
-def batched_it(iterable: Iterable[_T_co], n: int) -> Iterator[Iterator[_T_co]]:
-    """Batch data into iterators of length n. The last batch may be shorter."""
-    # batched('ABCDEFG', 3) --> ABC DEF G
-    if n < 1:
-        raise ValueError("Batch size must be at least one")
-    it = iter(iterable)
-    while True:
-        chunk_it = islice(it, n)
-        try:
-            first_el = next(chunk_it)
-        except StopIteration:
-            return
-        yield chain((first_el,), chunk_it)
+def batched(iterable: Iterable[_T_co], batch_rows: int) -> Iterator[tuple[_T_co, ...]]:
+    """
+    Batch data into tuples of length batch_rows .
+    The last batch may be shorter.
+    """
+    yield from (tuple(batch) for batch in _dynamic_batched_core(iterable, batch_rows))
+def batched_it(
+    iterable: Iterable[_T_co],
+    batch_rows: int = DEFAULT_CHUNK_ROWS,
+) -> Iterator[Iterator[_T_co]]:
+    """
+    Batch data into iterators with dynamic sizing
+    based on row count and memory usage.
+    """
+    yield from (iter(batch) for batch in _dynamic_batched_core(iterable, batch_rows))
 def flatten(items):

{datachain-0.28.0.dist-info → datachain-0.28.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datachain
-Version: 0.28.0
+Version: 0.28.2
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License-Expression: Apache-2.0

{datachain-0.28.0.dist-info → datachain-0.28.2.dist-info}/RECORD RENAMED Viewed

@@ -19,7 +19,7 @@ datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
 datachain/semver.py,sha256=UB8GHPBtAP3UJGeiuJoInD7SK-DnB93_Xd1qy_CQ9cU,2074
 datachain/studio.py,sha256=-BmKLVNBLPFveUgVVE2So3aaiGndO2jK2qbHZ0zBDd8,15239
 datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
-datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
+datachain/utils.py,sha256=Gp5JVr_m7nVWQGDOjrGnZjRXF9-Ai-MBxiPJIcpPvWQ,15451
 datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
 datachain/catalog/catalog.py,sha256=QTWCXy75iWo-0MCXyfV_WbsKeZ1fpLpvL8d60rxn1ws,65528
 datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
@@ -75,7 +75,7 @@ datachain/lib/audio.py,sha256=fQmIBq-9hrUZtkgeJdPHYA_D8Wfe9D4cQZk4_ijxpNc,7580
 datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
 datachain/lib/data_model.py,sha256=Rjah76GHwIV6AZQk4rsdg6JLre5D8Kb9T4PS5SXzsPA,3740
 datachain/lib/dataset_info.py,sha256=7w-DoKOyIVoOtWGCgciMLcP5CiAWJB3rVI-vUDF80k0,3311
-datachain/lib/file.py,sha256=_ch7xYcpl0kzImgEwccbQ-a5qb9rbEvx1vcuWerOn9k,42608
+datachain/lib/file.py,sha256=IGwpCwjsSOpZXlRsatcMKToMmuvYiX6_UtaTjUKAAdg,44511
 datachain/lib/hf.py,sha256=3xdvPQPilnJiGv3H4S4bTGqvrGGlZgZmqjE1n_SMJZg,7293
 datachain/lib/image.py,sha256=erWvZW5M3emnbl6_fGAOPyKm-1EKbt3vOdWPfe3Oo7U,3265
 datachain/lib/listing.py,sha256=U-2stsTEwEsq4Y80dqGfktGzkmB5-ZntnL1_rzXlH0k,7089
@@ -85,13 +85,13 @@ datachain/lib/model_store.py,sha256=dkL2rcT5ag-kbgkhQPL_byEs-TCYr29qvdltroL5NxM,
 datachain/lib/namespaces.py,sha256=it52UbbwB8dzhesO2pMs_nThXiPQ1Ph9sD9I3GQkg5s,2099
 datachain/lib/projects.py,sha256=8lN0qV8czX1LGtWURCUvRlSJk-RpO9w9Rra_pOZus6g,2595
 datachain/lib/pytorch.py,sha256=S-st2SAczYut13KMf6eSqP_OQ8otWI5TRmzhK5fN3k0,7828
-datachain/lib/settings.py,sha256=9wi0FoHxRxNiyn99pR28IYsMkoo47jQxeXuObQr2Ar0,2929
+datachain/lib/settings.py,sha256=n0YYhCVdgCdMkCSLY7kscJF9mUhlQ0a4ENWBsJFynkw,3809
 datachain/lib/signal_schema.py,sha256=JMsL8c4iCRH9PoRumvjimsOLQQslTjm_aDR2jh1zT2Q,38558
 datachain/lib/tar.py,sha256=MLcVjzIgBqRuJacCNpZ6kwSZNq1i2tLyROc8PVprHsA,999
 datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
-datachain/lib/udf.py,sha256=SUnJWRDC3TlLhvpi8iqqJbeZGn5DChot7DyH-0Q-z20,17305
+datachain/lib/udf.py,sha256=IB1IKF5KyA-NiyfhVzmBPpF_aITPS3zSlrt24f_Ofjo,17956
 datachain/lib/udf_signature.py,sha256=Yz20iJ-WF1pijT3hvcDIKFzgWV9gFxZM73KZRx3NbPk,7560
-datachain/lib/utils.py,sha256=rG2y7NwTqZOuomZZRmrA-Q-ANM_j1cToQYqDJoOeGyU,1480
+datachain/lib/utils.py,sha256=RLji1gHnfDXtJCnBo8BcNu1obndFpVsXJ_1Vb-FQ9Qo,4554
 datachain/lib/video.py,sha256=ddVstiMkfxyBPDsnjCKY0d_93bw-DcMqGqN60yzsZoo,6851
 datachain/lib/webdataset.py,sha256=CkW8FfGigNx6wo2EEK4KMjhEE8FamRHWGs2HZuH7jDY,7214
 datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0EVlUE,2525
@@ -104,14 +104,14 @@ datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUO
 datachain/lib/dc/__init__.py,sha256=TFci5HTvYGjBesNUxDAnXaX36PnzPEUSn5a6JxB9o0U,872
 datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
 datachain/lib/dc/database.py,sha256=g5M6NjYR1T0vKte-abV-3Ejnm-HqxTIMir5cRi_SziE,6051
-datachain/lib/dc/datachain.py,sha256=mLE5v4KhzEQm7HVWBTxY6EwJ2J-YeFVcLUY4I21216c,93212
+datachain/lib/dc/datachain.py,sha256=T5-b2LLCF0zYhXQjOgtzzr6cm5NfrKVGxcJTWn7tfNU,94164
 datachain/lib/dc/datasets.py,sha256=P6CIJizD2IYFwOQG5D3VbQRjDmUiRH0ysdtb551Xdm8,15098
 datachain/lib/dc/hf.py,sha256=AP_MUHg6HJWae10PN9hD_beQVjrl0cleZ6Cvhtl1yoI,2901
 datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
 datachain/lib/dc/listings.py,sha256=V379Cb-7ZyquM0w7sWArQZkzInZy4GB7QQ1ZfowKzQY,4544
 datachain/lib/dc/pandas.py,sha256=ObueUXDUFKJGu380GmazdG02ARpKAHPhSaymfmOH13E,1489
 datachain/lib/dc/parquet.py,sha256=zYcSgrWwyEDW9UxGUSVdIVsCu15IGEf0xL8KfWQqK94,1782
-datachain/lib/dc/records.py,sha256=FpPbApWopUri1gIaSMsfXN4fevja4mjmfb6Q5eiaGxI,3116
+datachain/lib/dc/records.py,sha256=4N1Fq-j5r4GK-PR5jIO-9B2u_zTNX9l-6SmcRhQDAsw,3136
 datachain/lib/dc/storage.py,sha256=FXroEdxOZfbuEBIWfWTkbGwrI0D4_mrLZSRsIQm0WFE,7693
 datachain/lib/dc/utils.py,sha256=VawOAlJSvAtZbsMg33s5tJe21TRx1Km3QggI1nN6tnw,3984
 datachain/lib/dc/values.py,sha256=7l1n352xWrEdql2NhBcZ3hj8xyPglWiY4qHjFPjn6iw,1428
@@ -126,7 +126,7 @@ datachain/model/ultralytics/pose.py,sha256=pBlmt63Qe68FKmexHimUGlNbNOoOlMHXG4fzX
 datachain/model/ultralytics/segment.py,sha256=63bDCj43E6iZ0hFI5J6uQfksdCmjEp6sEm1XzVaE8pw,2986
 datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
 datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
-datachain/query/dataset.py,sha256=cYNrg1QyrZpO-oup3mqmSYHUvgEYBKe8RgkVbyQa6p0,62777
+datachain/query/dataset.py,sha256=OJZ_YwpS5i4B0wVmosMmMNW1qABr6zyOmqNHQdAWir4,62704
 datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
 datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
 datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -158,9 +158,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
 datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
 datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
 datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
-datachain-0.28.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
-datachain-0.28.0.dist-info/METADATA,sha256=lA3lv9RX2NeQPobrEjoEbAwg5K3zmnAnbDJ_hjR8KLw,13766
-datachain-0.28.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-datachain-0.28.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
-datachain-0.28.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
-datachain-0.28.0.dist-info/RECORD,,
+datachain-0.28.2.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
+datachain-0.28.2.dist-info/METADATA,sha256=dYo2qW8RMNNCyy6KOXztfXOIldyS4_mADxeAlCI9cKw,13766
+datachain-0.28.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+datachain-0.28.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
+datachain-0.28.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
+datachain-0.28.2.dist-info/RECORD,,

{datachain-0.28.0.dist-info → datachain-0.28.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{datachain-0.28.0.dist-info → datachain-0.28.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datachain-0.28.0.dist-info → datachain-0.28.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{datachain-0.28.0.dist-info → datachain-0.28.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

datachain 0.28.0__py3-none-any.whl → 0.28.2__py3-none-any.whl

Potentially problematic release.

datachain 0.28.0py3-none-any.whl → 0.28.2py3-none-any.whl