PyPI - datachain - Versions diffs - 0.25.2__py3-none-any.whl → 0.26.1__py3-none-any.whl - Mend

datachain 0.25.2py3-none-any.whl → 0.26.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (19) hide show

datachain/__init__.py +6 -0
datachain/catalog/loader.py +4 -0
datachain/func/__init__.py +2 -1
datachain/func/conditional.py +34 -0
datachain/lib/audio.py +151 -0
datachain/lib/convert/sql_to_python.py +8 -0
datachain/lib/dc/datachain.py +227 -67
datachain/lib/file.py +190 -1
datachain/lib/model_store.py +8 -0
datachain/lib/pytorch.py +4 -1
datachain/lib/signal_schema.py +56 -11
datachain/lib/udf.py +17 -5
datachain/query/dataset.py +37 -9
{datachain-0.25.2.dist-info → datachain-0.26.1.dist-info}/METADATA +6 -2
{datachain-0.25.2.dist-info → datachain-0.26.1.dist-info}/RECORD +19 -18
{datachain-0.25.2.dist-info → datachain-0.26.1.dist-info}/WHEEL +0 -0
{datachain-0.25.2.dist-info → datachain-0.26.1.dist-info}/entry_points.txt +0 -0
{datachain-0.25.2.dist-info → datachain-0.26.1.dist-info}/licenses/LICENSE +0 -0
{datachain-0.25.2.dist-info → datachain-0.26.1.dist-info}/top_level.txt +0 -0

datachain/lib/file.py CHANGED Viewed

@@ -43,7 +43,7 @@ logger = logging.getLogger("datachain")
 # how to create file path when exporting
 ExportPlacement = Literal["filename", "etag", "fullpath", "checksum"]
-FileType = Literal["binary", "text", "image", "video"]
+FileType = Literal["binary", "text", "image", "video", "audio"]
 EXPORT_FILES_MAX_THREADS = 5
@@ -312,6 +312,14 @@ class File(DataModel):
         file._set_stream(self._catalog, caching_enabled=self._caching_enabled)
         return file
+    def as_audio_file(self) -> "AudioFile":
+        """Convert the file to a `AudioFile` object."""
+        if isinstance(self, AudioFile):
+            return self
+        file = AudioFile(**self.model_dump())
+        file._set_stream(self._catalog, caching_enabled=self._caching_enabled)
+        return file
     @classmethod
     def upload(
         cls, data: bytes, path: str, catalog: Optional["Catalog"] = None
@@ -851,6 +859,157 @@ class VideoFile(File):
             start += duration
+class AudioFile(File):
+    """
+    A data model for handling audio files.
+    This model inherits from the `File` model and provides additional functionality
+    for reading audio files, extracting audio fragments, and splitting audio into
+    fragments.
+    """
+    def get_info(self) -> "Audio":
+        """
+        Retrieves metadata and information about the audio file. It does not
+        download the file if possible, only reads its header. It is thus might be
+        a good idea to disable caching and prefetching for UDF if you only need
+        audio metadata.
+        Returns:
+            Audio: A Model containing audio metadata such as duration,
+                   sample rate, channels, and codec details.
+        """
+        from .audio import audio_info
+        return audio_info(self)
+    def get_fragment(self, start: float, end: float) -> "AudioFragment":
+        """
+        Returns an audio fragment from the specified time range. It does not
+        download the file, neither it actually extracts the fragment. It returns
+        a Model representing the audio fragment, which can be used to read or save
+        it later.
+        Args:
+            start (float): The start time of the fragment in seconds.
+            end (float): The end time of the fragment in seconds.
+        Returns:
+            AudioFragment: A Model representing the audio fragment.
+        """
+        if start < 0 or end < 0 or start >= end:
+            raise ValueError(f"Invalid time range: ({start:.3f}, {end:.3f})")
+        return AudioFragment(audio=self, start=start, end=end)
+    def get_fragments(
+        self,
+        duration: float,
+        start: float = 0,
+        end: Optional[float] = None,
+    ) -> "Iterator[AudioFragment]":
+        """
+        Splits the audio into multiple fragments of a specified duration.
+        Args:
+            duration (float): The duration of each audio fragment in seconds.
+            start (float): The starting time in seconds (default: 0).
+            end (float, optional): The ending time in seconds. If None, the entire
+                                   remaining audio is processed (default: None).
+        Returns:
+            Iterator[AudioFragment]: An iterator yielding audio fragments.
+        Note:
+            If end is not specified, number of samples will be taken from the
+            audio file, this means audio file needs to be downloaded.
+        """
+        if duration <= 0:
+            raise ValueError("duration must be a positive float")
+        if start < 0:
+            raise ValueError("start must be a non-negative float")
+        if end is None:
+            end = self.get_info().duration
+        if end < 0:
+            raise ValueError("end must be a non-negative float")
+        if start >= end:
+            raise ValueError("start must be less than end")
+        while start < end:
+            yield self.get_fragment(start, min(start + duration, end))
+            start += duration
+class AudioFragment(DataModel):
+    """
+    A data model for representing an audio fragment.
+    This model represents a specific fragment within an audio file with defined
+    start and end times. It allows access to individual fragments and provides
+    functionality for reading and saving audio fragments as separate audio files.
+    Attributes:
+        audio (AudioFile): The audio file containing the audio fragment.
+        start (float): The starting time of the audio fragment in seconds.
+        end (float): The ending time of the audio fragment in seconds.
+    """
+    audio: AudioFile
+    start: float
+    end: float
+    def get_np(self) -> tuple["ndarray", int]:
+        """
+        Returns the audio fragment as a NumPy array with sample rate.
+        Returns:
+            tuple[ndarray, int]: A tuple containing the audio data as a NumPy array
+                               and the sample rate.
+        """
+        from .audio import audio_fragment_np
+        duration = self.end - self.start
+        return audio_fragment_np(self.audio, self.start, duration)
+    def read_bytes(self, format: str = "wav") -> bytes:
+        """
+        Returns the audio fragment as audio bytes.
+        Args:
+            format (str): The desired audio format (e.g., 'wav', 'mp3').
+                         Defaults to 'wav'.
+        Returns:
+            bytes: The encoded audio fragment as bytes.
+        """
+        from .audio import audio_fragment_bytes
+        duration = self.end - self.start
+        return audio_fragment_bytes(self.audio, self.start, duration, format)
+    def save(self, output: str, format: Optional[str] = None) -> "AudioFile":
+        """
+        Saves the audio fragment as a new audio file.
+        If `output` is a remote path, the audio file will be uploaded to remote storage.
+        Args:
+            output (str): The destination path, which can be a local file path
+                          or a remote URL.
+            format (str, optional): The output audio format (e.g., 'wav', 'mp3').
+                                    If None, the format is inferred from the
+                                    file extension.
+        Returns:
+            AudioFile: A Model representing the saved audio file.
+        """
+        from .audio import save_audio_fragment
+        return save_audio_fragment(self.audio, self.start, self.end, output, format)
 class VideoFrame(DataModel):
     """
     A data model for representing a video frame.
@@ -981,6 +1140,34 @@ class Video(DataModel):
     codec: str = Field(default="")
+class Audio(DataModel):
+    """
+    A data model representing metadata for an audio file.
+    Attributes:
+        sample_rate (int): The sample rate of the audio (samples per second).
+                          Defaults to -1 if unknown.
+        channels (int): The number of audio channels. Defaults to -1 if unknown.
+        duration (float): The total duration of the audio in seconds.
+                         Defaults to -1.0 if unknown.
+        samples (int): The total number of samples in the audio.
+                      Defaults to -1 if unknown.
+        format (str): The format of the audio file (e.g., 'wav', 'mp3').
+                     Defaults to an empty string.
+        codec (str): The codec used for encoding the audio. Defaults to an empty string.
+        bit_rate (int): The bit rate of the audio in bits per second.
+                       Defaults to -1 if unknown.
+    """
+    sample_rate: int = Field(default=-1)
+    channels: int = Field(default=-1)
+    duration: float = Field(default=-1.0)
+    samples: int = Field(default=-1)
+    format: str = Field(default="")
+    codec: str = Field(default="")
+    bit_rate: int = Field(default=-1)
 class ArrowRow(DataModel):
     """`DataModel` for reading row from Arrow-supported file."""
@@ -1018,5 +1205,7 @@ def get_file_type(type_: FileType = "binary") -> type[File]:
         file = ImageFile  # type: ignore[assignment]
     elif type_ == "video":
         file = VideoFile
+    elif type_ == "audio":
+        file = AudioFile
     return file

datachain/lib/model_store.py CHANGED Viewed

@@ -81,3 +81,11 @@ class ModelStore:
         if val is None or not ModelStore.is_pydantic(val):
             return None
         return val
+    @staticmethod
+    def is_partial(parent_type) -> bool:
+        return (
+            parent_type
+            and ModelStore.is_pydantic(parent_type)
+            and "@" in ModelStore.get_name(parent_type)
+        )

datachain/lib/pytorch.py CHANGED Viewed

@@ -125,7 +125,10 @@ class PytorchDataset(IterableDataset):
         ds = read_dataset(
             name=self.name, version=self.version, session=session
         ).settings(cache=self.cache, prefetch=self.prefetch)
-        ds = ds.remove_file_signals()
+        # remove file signals from dataset
+        schema = ds.signals_schema.clone_without_file_signals()
+        ds = ds.select(*schema.values.keys())
         if self.num_samples > 0:
             ds = ds.sample(self.num_samples)

datachain/lib/signal_schema.py CHANGED Viewed

@@ -446,14 +446,14 @@ class SignalSchema:
                 res[db_name] = python_to_sql(type_)
         return res
-    def row_to_objs(self, row: Sequence[Any]) -> list[DataValue]:
+    def row_to_objs(self, row: Sequence[Any]) -> list[Any]:
         self._init_setup_values()
-        objs: list[DataValue] = []
+        objs: list[Any] = []
         pos = 0
         for name, fr_type in self.values.items():
-            if self.setup_values and (val := self.setup_values.get(name, None)):
-                objs.append(val)
+            if self.setup_values and name in self.setup_values:
+                objs.append(self.setup_values.get(name))
             elif (fr := ModelStore.to_pydantic(fr_type)) is not None:
                 j, pos = unflatten_to_json_pos(fr, row, pos)
                 objs.append(fr(**j))
@@ -589,6 +589,9 @@ class SignalSchema:
         ]
         if name:
+            if "." in name:
+                name = name.replace(".", "__")
             signals = [
                 s
                 for s in signals
@@ -607,24 +610,38 @@ class SignalSchema:
         return SignalSchema(schema)
     def _find_in_tree(self, path: list[str]) -> DataType:
+        if val := self.tree.get(".".join(path)):
+            # If the path is a single string, we can directly access it
+            # without traversing the tree.
+            return val[0]
         curr_tree = self.tree
         curr_type = None
         i = 0
         while curr_tree is not None and i < len(path):
             if val := curr_tree.get(path[i]):
                 curr_type, curr_tree = val
-            elif i == 0 and len(path) > 1 and (val := curr_tree.get(".".join(path))):
-                curr_type, curr_tree = val
-                break
             else:
                 curr_type = None
+                break
             i += 1
-        if curr_type is None:
+        if curr_type is None or i < len(path):
+            # If we reached the end of the path and didn't find a type,
+            # or if we didn't traverse the entire path, raise an error.
             raise SignalResolvingError(path, "is not found")
         return curr_type
+    def group_by(
+        self, partition_by: Sequence[str], new_column: Sequence[Column]
+    ) -> "SignalSchema":
+        orig_schema = SignalSchema(copy.deepcopy(self.values))
+        schema = orig_schema.to_partial(*partition_by)
+        vals = {c.name: sql_to_python(c) for c in new_column}
+        return SignalSchema(schema.values | vals)
     def select_except_signals(self, *args: str) -> "SignalSchema":
         def has_signal(signal: str):
             signal = signal.replace(".", DEFAULT_DELIMITER)
@@ -888,7 +905,7 @@ class SignalSchema:
         return res
-    def to_partial(self, *columns: str) -> "SignalSchema":
+    def to_partial(self, *columns: str) -> "SignalSchema":  # noqa: C901
         """
         Convert the schema to a partial schema with only the specified columns.
@@ -931,9 +948,15 @@ class SignalSchema:
         partial_versions: dict[str, int] = {}
         def _type_name_to_partial(signal_name: str, type_name: str) -> str:
-            if "@" not in type_name:
+            # Check if we need to create a partial for this type
+            # Only create partials for custom types that are in the custom_types dict
+            if type_name not in custom_types:
                 return type_name
-            model_name, _ = ModelStore.parse_name_version(type_name)
+            if "@" in type_name:
+                model_name, _ = ModelStore.parse_name_version(type_name)
+            else:
+                model_name = type_name
             if signal_name not in signal_partials:
                 partial_versions.setdefault(model_name, 0)
@@ -957,6 +980,14 @@ class SignalSchema:
                     parent_type_partial = _type_name_to_partial(signal, parent_type)
                     schema[signal] = parent_type_partial
+                    # If this is a complex signal without field specifier (just "file")
+                    # and it's a custom type, include the entire complex signal
+                    if len(column_parts) == 1 and parent_type in custom_types:
+                        # Include the entire complex signal - no need to create partial
+                        schema[signal] = parent_type
+                        continue
                     continue
                 if parent_type not in custom_types:
@@ -971,6 +1002,20 @@ class SignalSchema:
                         f"Field {signal} not found in custom type {parent_type}"
                     )
+                # Check if this is the last part and if the column type is a complex
+                is_last_part = i == len(column_parts) - 1
+                is_complex_signal = signal_type in custom_types
+                if is_last_part and is_complex_signal:
+                    schema[column] = signal_type
+                    # Also need to remove the partial schema entry we created for the
+                    # parent since we're promoting the nested complex column to root
+                    parent_signal = column_parts[0]
+                    schema.pop(parent_signal, None)
+                    # Don't create partial types for this case
+                    break
+                # Create partial type for this field
                 partial_type = _type_name_to_partial(
                     ".".join(column_parts[: i + 1]),
                     signal_type,

datachain/lib/udf.py CHANGED Viewed

@@ -13,8 +13,7 @@ from datachain.asyn import AsyncMapper
 from datachain.cache import temporary_cache
 from datachain.dataset import RowDict
 from datachain.lib.convert.flatten import flatten
-from datachain.lib.data_model import DataValue
-from datachain.lib.file import File
+from datachain.lib.file import DataModel, File
 from datachain.lib.utils import AbstractUDF, DataChainError, DataChainParamsError
 from datachain.query.batch import (
     Batch,
@@ -266,15 +265,28 @@ class UDFBase(AbstractUDF):
     def _parse_row(
         self, row_dict: RowDict, catalog: "Catalog", cache: bool, download_cb: Callback
-    ) -> list[DataValue]:
+    ) -> list[Any]:
         assert self.params
         row = [row_dict[p] for p in self.params.to_udf_spec()]
         obj_row = self.params.row_to_objs(row)
         for obj in obj_row:
-            if isinstance(obj, File):
-                obj._set_stream(catalog, caching_enabled=cache, download_cb=download_cb)
+            self._set_stream_recursive(obj, catalog, cache, download_cb)
         return obj_row
+    def _set_stream_recursive(
+        self, obj: Any, catalog: "Catalog", cache: bool, download_cb: Callback
+    ) -> None:
+        """Recursively set the catalog stream on all File objects within an object."""
+        if isinstance(obj, File):
+            obj._set_stream(catalog, caching_enabled=cache, download_cb=download_cb)
+        # Check all fields for nested File objects, but only for DataModel objects
+        if isinstance(obj, DataModel):
+            for field_name in obj.model_fields:
+                field_value = getattr(obj, field_name, None)
+                if isinstance(field_value, DataModel):
+                    self._set_stream_recursive(field_value, catalog, cache, download_cb)
     def _prepare_row(self, row, udf_fields, catalog, cache, download_cb):
         row_dict = RowDict(zip(udf_fields, row))
         return self._parse_row(row_dict, catalog, cache, download_cb)

datachain/query/dataset.py CHANGED Viewed

@@ -559,7 +559,13 @@ class UDFStep(Step, ABC):
         """
         Create temporary table with group by partitions.
         """
+        # Check if partition_by is set, we need it to create partitions.
         assert self.partition_by is not None
+        # Check if sys__id is in the query, we need it to be able to join
+        # the partition table with the udf table later.
+        assert any(c.name == "sys__id" for c in query.selected_columns), (
+            "Query must have sys__id column to use partitioning."
+        )
         if isinstance(self.partition_by, (list, tuple, GeneratorType)):
             list_partition_by = list(self.partition_by)
@@ -606,6 +612,22 @@ class UDFStep(Step, ABC):
         # Apply partitioning if needed.
         if self.partition_by is not None:
+            if not any(c.name == "sys__id" for c in query.selected_columns):
+                # If sys__id is not in the query, we need to create a temp table
+                # to hold the query results, so we can join it with the
+                # partition table later.
+                columns = [
+                    c if isinstance(c, Column) else Column(c.name, c.type)
+                    for c in query.subquery().columns
+                ]
+                temp_table = self.catalog.warehouse.create_dataset_rows_table(
+                    self.catalog.warehouse.temp_table_name(),
+                    columns=columns,
+                )
+                temp_tables.append(temp_table.name)
+                self.catalog.warehouse.copy_table(temp_table, query)
+                _query = query = temp_table.select()
             partition_tbl = self.create_partitions_table(query)
             temp_tables.append(partition_tbl.name)
             query = query.outerjoin(
@@ -1031,16 +1053,22 @@ class SQLGroupBy(SQLClause):
             c.get_column() if isinstance(c, Function) else c for c in self.group_by
         ]
-        cols = [
-            c.get_column()
-            if isinstance(c, Function)
-            else subquery.c[str(c)]
-            if isinstance(c, (str, C))
-            else c
-            for c in (*group_by, *self.cols)
-        ]
+        cols_dict: dict[str, Any] = {}
+        for c in (*group_by, *self.cols):
+            if isinstance(c, Function):
+                key = c.name
+                value = c.get_column()
+            elif isinstance(c, (str, C)):
+                key = str(c)
+                value = subquery.c[str(c)]
+            else:
+                key = c.name
+                value = c  # type: ignore[assignment]
+            cols_dict[key] = value
+        unique_cols = cols_dict.values()
-        return sqlalchemy.select(*cols).select_from(subquery).group_by(*group_by)
+        return sqlalchemy.select(*unique_cols).select_from(subquery).group_by(*group_by)
 def _validate_columns(

{datachain-0.25.2.dist-info → datachain-0.26.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datachain
-Version: 0.25.2
+Version: 0.26.1
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License-Expression: Apache-2.0
@@ -63,6 +63,9 @@ Provides-Extra: torch
 Requires-Dist: torch>=2.1.0; extra == "torch"
 Requires-Dist: torchvision; extra == "torch"
 Requires-Dist: transformers>=4.36.0; extra == "torch"
+Provides-Extra: audio
+Requires-Dist: torchaudio; extra == "audio"
+Requires-Dist: soundfile; extra == "audio"
 Provides-Extra: remote
 Requires-Dist: lz4; extra == "remote"
 Requires-Dist: requests>=2.22.0; extra == "remote"
@@ -78,7 +81,7 @@ Requires-Dist: ffmpeg-python; extra == "video"
 Requires-Dist: imageio[ffmpeg,pyav]>=2.37.0; extra == "video"
 Requires-Dist: opencv-python; extra == "video"
 Provides-Extra: tests
-Requires-Dist: datachain[hf,remote,torch,vector,video]; extra == "tests"
+Requires-Dist: datachain[audio,hf,remote,torch,vector,video]; extra == "tests"
 Requires-Dist: pytest<9,>=8; extra == "tests"
 Requires-Dist: pytest-sugar>=0.9.6; extra == "tests"
 Requires-Dist: pytest-cov>=4.1.0; extra == "tests"
@@ -108,6 +111,7 @@ Requires-Dist: accelerate; extra == "examples"
 Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
 Requires-Dist: ultralytics; extra == "examples"
 Requires-Dist: open_clip_torch; extra == "examples"
+Requires-Dist: openai; extra == "examples"
 Dynamic: license-file
 ================

{datachain-0.25.2.dist-info → datachain-0.26.1.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-datachain/__init__.py,sha256=ofXacfzLKYzTqU1oyHz5xZi1L4skQCoJdUMC4YARenk,1616
+datachain/__init__.py,sha256=2TZ8ptSB9BtnYF31mDEhWG9N16EQ5pf9vNqQaFr2txs,1712
 datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
 datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
 datachain/cache.py,sha256=ESVRaCJXEThMIfGEFVHx6wJPOZA7FYk9V6WxjyuqUBY,3626
@@ -23,7 +23,7 @@ datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
 datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
 datachain/catalog/catalog.py,sha256=QTWCXy75iWo-0MCXyfV_WbsKeZ1fpLpvL8d60rxn1ws,65528
 datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
-datachain/catalog/loader.py,sha256=UXjYD6BNRoupPvkiz3-b04jepXhtLHCA4gzKFnXxOtQ,5987
+datachain/catalog/loader.py,sha256=B2cps5coFE4MBttM-j8cs7JgNVPjnHKF4Gx1s2fJrxw,6119
 datachain/cli/__init__.py,sha256=WvBqnwjG8Wp9xGCn-4eqfoZ3n7Sj1HJemCi4MayJh_c,8221
 datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
 datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQaRlIhI,493
@@ -58,11 +58,11 @@ datachain/diff/__init__.py,sha256=-OFZzgOplqO84iWgGY7kfe60NXaWR9JRIh9T-uJboAM,96
 datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
 datachain/fs/utils.py,sha256=s-FkTOCGBk-b6TT3toQH51s9608pofoFjUSTc1yy7oE,825
-datachain/func/__init__.py,sha256=CjNLHfJkepdXdRZ6HjJBjNSIjOeFMuMkwPDaPUrM75g,1270
+datachain/func/__init__.py,sha256=9K2MEC1NclY_zWuqevfEUOcrSE26cXDVnGqhNTj4lF8,1288
 datachain/func/aggregate.py,sha256=fmVEKf3MUR29dEgllGdtl6nG7Lwz-SiyA5X1EyRRNUk,12456
 datachain/func/array.py,sha256=fz5NUIPkp_KZ7tadCqJQSSJwWMYXEfYn60QkG2epC3k,13627
 datachain/func/base.py,sha256=wA0sBQAVyN9LPxoo7Ox83peS0zUVnyuKxukwAcjGLfY,534
-datachain/func/conditional.py,sha256=bzIZRSpVpe-lrHoWPTCA7bzZ-AHtR44BVM82hqD1pY0,9188
+datachain/func/conditional.py,sha256=9YYurD_PBMyf5rR9dj2gLv-Lo7UhYfHW6EtrUErVpz8,10165
 datachain/func/func.py,sha256=fpslnn4edr0dH3mD8BSTndRFJiiVZvbJoBJV6HkHMqw,17400
 datachain/func/numeric.py,sha256=J6FgzuIAcS6B02Cm1qPnJdB6ut21jyBDVXSBrkZNZaQ,6978
 datachain/func/path.py,sha256=9Jas35QhEtRai4l54hMqVvuJsqxHvOx88oo4vym1H_I,4077
@@ -71,24 +71,25 @@ datachain/func/string.py,sha256=X9u4ip97U63RCaKRhMddoze7HgPiY3LbPRn9G06UWWo,7311
 datachain/func/window.py,sha256=ImyRpc1QI8QUSPO7KdD60e_DPVo7Ja0G5kcm6BlyMcw,1584
 datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/lib/arrow.py,sha256=hdEQ8I1JgNmEAaXTaqaU1qvZDi5dgtes1IC69ycthz8,10753
+datachain/lib/audio.py,sha256=J7XJ14ItPF9y6pN-tmMV9In9X9rgwlBwzyzdGOUkPGk,4376
 datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
 datachain/lib/data_model.py,sha256=ZwBXELtqROEdLL4DmxTipnwUZmhQvMz_UVDzyf7nQ9Y,2899
 datachain/lib/dataset_info.py,sha256=7w-DoKOyIVoOtWGCgciMLcP5CiAWJB3rVI-vUDF80k0,3311
-datachain/lib/file.py,sha256=gTzJXaGIyFOrw_B4yiOEs7U23n4oAQuWDI2v9KWwp2o,33889
+datachain/lib/file.py,sha256=tHBBacsh1580UPFC6fAINBNwNiyymNgzj89rpsz1LKc,40817
 datachain/lib/hf.py,sha256=_dCoGTv7n5cBgxhCDfZI-t3hnMCXGHd6sEsxRThcizE,5754
 datachain/lib/image.py,sha256=erWvZW5M3emnbl6_fGAOPyKm-1EKbt3vOdWPfe3Oo7U,3265
 datachain/lib/listing.py,sha256=U-2stsTEwEsq4Y80dqGfktGzkmB5-ZntnL1_rzXlH0k,7089
 datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
 datachain/lib/meta_formats.py,sha256=zdyg6XLk3QIsSk3I7s0Ez5kaCJSlE3uq7JiGxf7UwtU,6348
-datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
+datachain/lib/model_store.py,sha256=dkL2rcT5ag-kbgkhQPL_byEs-TCYr29qvdltroL5NxM,2734
 datachain/lib/namespaces.py,sha256=it52UbbwB8dzhesO2pMs_nThXiPQ1Ph9sD9I3GQkg5s,2099
 datachain/lib/projects.py,sha256=8lN0qV8czX1LGtWURCUvRlSJk-RpO9w9Rra_pOZus6g,2595
-datachain/lib/pytorch.py,sha256=oBBd6cxYrcwaFz7IQajKqhGqDdNnwUZWs0wJPRizrjk,7712
+datachain/lib/pytorch.py,sha256=S-st2SAczYut13KMf6eSqP_OQ8otWI5TRmzhK5fN3k0,7828
 datachain/lib/settings.py,sha256=9wi0FoHxRxNiyn99pR28IYsMkoo47jQxeXuObQr2Ar0,2929
-datachain/lib/signal_schema.py,sha256=dVEqqrQQ_BS3yzU_49-Gari7IjVyMl1UT8h1WIsZabs,36489
+datachain/lib/signal_schema.py,sha256=tOWcWEG0ZwiU0qxywEYs3qkTexQQHmzg28wZ1CJGyEI,38552
 datachain/lib/tar.py,sha256=MLcVjzIgBqRuJacCNpZ6kwSZNq1i2tLyROc8PVprHsA,999
 datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
-datachain/lib/udf.py,sha256=3uITkhO8IZnX49aePheObzd5ORYi2DIDYZVMQlBAJ-s,16687
+datachain/lib/udf.py,sha256=nkcB3HNtSteUspwsGmOKyy3mH2F-Sfo6iW64-Ep47-I,17299
 datachain/lib/udf_signature.py,sha256=Yz20iJ-WF1pijT3hvcDIKFzgWV9gFxZM73KZRx3NbPk,7560
 datachain/lib/utils.py,sha256=rG2y7NwTqZOuomZZRmrA-Q-ANM_j1cToQYqDJoOeGyU,1480
 datachain/lib/video.py,sha256=u6fLJWj5G6QqsVkpfHnKGklBNpG3BRRg6v3izngnNcU,6767
@@ -97,13 +98,13 @@ datachain/lib/webdataset_laion.py,sha256=xvT6m_r5y0KbOx14BUe7UC5mOgrktJq53Mh-H0E
 datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/lib/convert/flatten.py,sha256=IZFiUYbgXSxXhPSG5Cqf5IjnJ4ZDZKXMr4o_yCR1NY4,1505
 datachain/lib/convert/python_to_sql.py,sha256=wg-O5FRKX3x3Wh8ZL1b9ntMlgf1zRO4djMP3t8CHJLo,3188
-datachain/lib/convert/sql_to_python.py,sha256=XXCBYDQFUXJIBNWkjEP944cnCfJ8GF2Tji0DLF3A_zQ,315
+datachain/lib/convert/sql_to_python.py,sha256=Gxc4FylWC_Pvvuawuc2MKZIiuAWI7wje8pyeN1MxRrU,670
 datachain/lib/convert/unflatten.py,sha256=ysMkstwJzPMWUlnxn-Z-tXJR3wmhjHeSN_P-sDcLS6s,2010
 datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUOzHUGPoyZXAB0,4360
 datachain/lib/dc/__init__.py,sha256=TFci5HTvYGjBesNUxDAnXaX36PnzPEUSn5a6JxB9o0U,872
 datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
 datachain/lib/dc/database.py,sha256=g5M6NjYR1T0vKte-abV-3Ejnm-HqxTIMir5cRi_SziE,6051
-datachain/lib/dc/datachain.py,sha256=_FJnpgNN_b2xz39MsgeS0NTto0hzpcFPbJlaUBLcqTs,87094
+datachain/lib/dc/datachain.py,sha256=ap54lcuj71tvp0zX1jiFFiEWvA5UPeyYJRJkd2APmlI,92897
 datachain/lib/dc/datasets.py,sha256=P6CIJizD2IYFwOQG5D3VbQRjDmUiRH0ysdtb551Xdm8,15098
 datachain/lib/dc/hf.py,sha256=PJl2wiLjdRsMz0SYbLT-6H8b-D5i2WjeH7li8HHOk_0,2145
 datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
@@ -125,7 +126,7 @@ datachain/model/ultralytics/pose.py,sha256=pBlmt63Qe68FKmexHimUGlNbNOoOlMHXG4fzX
 datachain/model/ultralytics/segment.py,sha256=63bDCj43E6iZ0hFI5J6uQfksdCmjEp6sEm1XzVaE8pw,2986
 datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
 datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
-datachain/query/dataset.py,sha256=t9EWZkJGPRPcBvKOsFO7ZiaTeUXc8YuTZydRbcv83_E,61350
+datachain/query/dataset.py,sha256=cYNrg1QyrZpO-oup3mqmSYHUvgEYBKe8RgkVbyQa6p0,62777
 datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
 datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
 datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -157,9 +158,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
 datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
 datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
 datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
-datachain-0.25.2.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
-datachain-0.25.2.dist-info/METADATA,sha256=aA1Ee1umcPyEXMzrdlhNexDW1rq2zRo2IJHAKyOJwN4,13385
-datachain-0.25.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-datachain-0.25.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
-datachain-0.25.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
-datachain-0.25.2.dist-info/RECORD,,
+datachain-0.26.1.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
+datachain-0.26.1.dist-info/METADATA,sha256=C0Pb9d9IcJ6oOPXihcyEhTc_Rf7Fe4pP_anKhC3JfeU,13543
+datachain-0.26.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+datachain-0.26.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
+datachain-0.26.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
+datachain-0.26.1.dist-info/RECORD,,

{datachain-0.25.2.dist-info → datachain-0.26.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{datachain-0.25.2.dist-info → datachain-0.26.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datachain-0.25.2.dist-info → datachain-0.26.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{datachain-0.25.2.dist-info → datachain-0.26.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

datachain 0.25.2__py3-none-any.whl → 0.26.1__py3-none-any.whl

Potentially problematic release.

datachain 0.25.2py3-none-any.whl → 0.26.1py3-none-any.whl