PyPI - datachain - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl - Mend

datachain 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (24) hide show

datachain/catalog/catalog.py +0 -81
datachain/cli.py +0 -37
datachain/data_storage/schema.py +1 -1
datachain/data_storage/sqlite.py +1 -10
datachain/data_storage/warehouse.py +12 -5
datachain/lib/arrow.py +4 -4
datachain/lib/clip.py +14 -3
datachain/lib/convert/python_to_sql.py +9 -0
datachain/lib/data_model.py +10 -1
datachain/lib/dc.py +95 -30
datachain/lib/hf.py +166 -0
datachain/lib/image.py +9 -1
datachain/lib/pytorch.py +1 -2
datachain/lib/signal_schema.py +124 -20
datachain/lib/text.py +4 -0
datachain/lib/udf.py +14 -20
datachain/query/dataset.py +10 -3
datachain/query/session.py +5 -3
{datachain-0.3.6.dist-info → datachain-0.3.8.dist-info}/METADATA +8 -3
{datachain-0.3.6.dist-info → datachain-0.3.8.dist-info}/RECORD +24 -23
{datachain-0.3.6.dist-info → datachain-0.3.8.dist-info}/WHEEL +1 -1
{datachain-0.3.6.dist-info → datachain-0.3.8.dist-info}/LICENSE +0 -0
{datachain-0.3.6.dist-info → datachain-0.3.8.dist-info}/entry_points.txt +0 -0
{datachain-0.3.6.dist-info → datachain-0.3.8.dist-info}/top_level.txt +0 -0

datachain/lib/hf.py ADDED Viewed

@@ -0,0 +1,166 @@
+try:
+    from datasets import (
+        Array2D,
+        Array3D,
+        Array4D,
+        Array5D,
+        Audio,
+        ClassLabel,
+        Dataset,
+        DatasetDict,
+        Image,
+        IterableDataset,
+        IterableDatasetDict,
+        Sequence,
+        Value,
+        load_dataset,
+    )
+    from datasets.features.features import string_to_arrow
+    from datasets.features.image import image_to_bytes
+except ImportError as exc:
+    raise ImportError(
+        "Missing dependencies for huggingface datasets:\n"
+        "To install run:\n\n"
+        "  pip install 'datachain[hf]'\n"
+    ) from exc
+from io import BytesIO
+from typing import TYPE_CHECKING, Any, Union
+import PIL
+from tqdm import tqdm
+from datachain.lib.arrow import arrow_type_mapper
+from datachain.lib.data_model import DataModel, DataType, dict_to_data_model
+from datachain.lib.udf import Generator
+if TYPE_CHECKING:
+    from pydantic import BaseModel
+HFDatasetType = Union[DatasetDict, Dataset, IterableDatasetDict, IterableDataset]
+class HFClassLabel(DataModel):
+    string: str
+    integer: int
+    def read(self):
+        return self.integer
+class HFImage(DataModel):
+    img: bytes
+    def read(self):
+        return PIL.Image.open(BytesIO(self.img))
+class HFAudio(DataModel):
+    path: str
+    array: list[float]
+    sampling_rate: int
+class HFGenerator(Generator):
+    def __init__(
+        self,
+        ds: Union[str, HFDatasetType],
+        output_schema: type["BaseModel"],
+        *args,
+        **kwargs,
+    ):
+        super().__init__()
+        self.ds = ds
+        self.output_schema = output_schema
+        self.args = args
+        self.kwargs = kwargs
+    def setup(self):
+        self.ds_dict = stream_splits(self.ds, *self.args, **self.kwargs)
+    def process(self, split: str = ""):
+        desc = "Parsed Hugging Face dataset"
+        ds = self.ds_dict[split]
+        if split:
+            desc += f" split '{split}'"
+        with tqdm(desc=desc, unit=" rows") as pbar:
+            for row in ds:
+                output_dict = {}
+                if split:
+                    output_dict["split"] = split
+                for name, feat in ds.features.items():
+                    anno = self.output_schema.model_fields[name].annotation
+                    output_dict[name] = _convert_feature(row[name], feat, anno)
+                yield self.output_schema(**output_dict)
+                pbar.update(1)
+def stream_splits(ds: Union[str, HFDatasetType], *args, **kwargs):
+    if isinstance(ds, str):
+        ds = load_dataset(ds, *args, streaming=True, **kwargs)
+    if isinstance(ds, (DatasetDict, IterableDatasetDict)):
+        return ds
+    return {"": ds}
+def _convert_feature(val: Any, feat: Any, anno: Any) -> Any:
+    if isinstance(feat, (Value, Array2D, Array3D, Array4D, Array5D)):
+        return val
+    if isinstance(feat, ClassLabel):
+        return HFClassLabel(string=feat.names[val], integer=val)
+    if isinstance(feat, Sequence):
+        if isinstance(feat.feature, dict):
+            sdict = {}
+            for sname in val:
+                sfeat = feat.feature[sname]
+                sanno = anno.model_fields[sname].annotation
+                sdict[sname] = [_convert_feature(v, sfeat, sanno) for v in val[sname]]
+            return anno(**sdict)
+        return val
+    if isinstance(feat, Image):
+        return HFImage(img=image_to_bytes(val))
+    if isinstance(feat, Audio):
+        return HFAudio(**val)
+def get_output_schema(
+    ds: Union[Dataset, IterableDataset], model_name: str = ""
+) -> dict[str, DataType]:
+    fields_dict = {}
+    for name, val in ds.features.items():
+        fields_dict[name] = _feature_to_chain_type(name, val)  # type: ignore[assignment]
+    return fields_dict  # type: ignore[return-value]
+def _feature_to_chain_type(name: str, val: Any) -> type:  # noqa: PLR0911
+    if isinstance(val, Value):
+        return arrow_type_mapper(val.pa_type)
+    if isinstance(val, ClassLabel):
+        return HFClassLabel
+    if isinstance(val, Sequence):
+        if isinstance(val.feature, dict):
+            sequence_dict = {}
+            for sname, sval in val.feature.items():
+                dtype = _feature_to_chain_type(sname, sval)
+                sequence_dict[sname] = list[dtype]  # type: ignore[valid-type]
+            return dict_to_data_model(name, sequence_dict)  # type: ignore[arg-type]
+        return list[_feature_to_chain_type(name, val.feature)]  # type: ignore[arg-type,misc,return-value]
+    if isinstance(val, Array2D):
+        dtype = arrow_type_mapper(string_to_arrow(val.dtype))
+        return list[list[dtype]]  # type: ignore[valid-type]
+    if isinstance(val, Array3D):
+        dtype = arrow_type_mapper(string_to_arrow(val.dtype))
+        return list[list[list[dtype]]]  # type: ignore[valid-type]
+    if isinstance(val, Array4D):
+        dtype = arrow_type_mapper(string_to_arrow(val.dtype))
+        return list[list[list[list[dtype]]]]  # type: ignore[valid-type]
+    if isinstance(val, Array5D):
+        dtype = arrow_type_mapper(string_to_arrow(val.dtype))
+        return list[list[list[list[list[dtype]]]]]  # type: ignore[valid-type]
+    if isinstance(val, Image):
+        return HFImage
+    if isinstance(val, Audio):
+        return HFAudio
+    raise TypeError(f"Unknown huggingface datasets type {type(val)}")

datachain/lib/image.py CHANGED Viewed

@@ -10,6 +10,7 @@ def convert_image(
     size: Optional[tuple[int, int]] = None,
     transform: Optional[Callable] = None,
     encoder: Optional[Callable] = None,
+    device: Optional[Union[str, torch.device]] = None,
 ) -> Union[Image.Image, torch.Tensor]:
     """
     Resize, transform, and otherwise convert an image.
@@ -20,6 +21,7 @@ def convert_image(
         size (tuple[int, int]): Size in (width, height) pixels for resizing.
         transform (Callable): Torchvision transform or huggingface processor to apply.
         encoder (Callable): Encode image using model.
+        device (str or torch.device): Device to use.
     """
     if mode:
         img = img.convert(mode)
@@ -35,6 +37,8 @@ def convert_image(
                 img = torch.tensor(img.pixel_values[0])  # type: ignore[assignment,attr-defined]
         except ImportError:
             pass
+        if device:
+            img = img.to(device)  # type: ignore[attr-defined]
         if encoder:
             img = img.unsqueeze(0)  # type: ignore[attr-defined]
     if encoder:
@@ -48,6 +52,7 @@ def convert_images(
     size: Optional[tuple[int, int]] = None,
     transform: Optional[Callable] = None,
     encoder: Optional[Callable] = None,
+    device: Optional[Union[str, torch.device]] = None,
 ) -> Union[list[Image.Image], torch.Tensor]:
     """
     Resize, transform, and otherwise convert one or more images.
@@ -58,11 +63,14 @@ def convert_images(
         size (tuple[int, int]): Size in (width, height) pixels for resizing.
         transform (Callable): Torchvision transform or huggingface processor to apply.
         encoder (Callable): Encode image using model.
+        device (str or torch.device): Device to use.
     """
     if isinstance(images, Image.Image):
         images = [images]
-    converted = [convert_image(img, mode, size, transform) for img in images]
+    converted = [
+        convert_image(img, mode, size, transform, device=device) for img in images
+    ]
     if isinstance(converted[0], torch.Tensor):
         converted = torch.stack(converted)  # type: ignore[assignment,arg-type]

datachain/lib/pytorch.py CHANGED Viewed

@@ -10,7 +10,6 @@ from torchvision.transforms import v2
 from datachain.catalog import Catalog, get_catalog
 from datachain.lib.dc import DataChain
-from datachain.lib.file import File
 from datachain.lib.text import convert_text
 if TYPE_CHECKING:
@@ -97,7 +96,7 @@ class PytorchDataset(IterableDataset):
         for row_features in ds.collect():
             row = []
             for fr in row_features:
-                if isinstance(fr, File):
+                if hasattr(fr, "read"):
                     row.append(fr.read())  # type: ignore[unreachable]
                 else:
                     row.append(fr)

datachain/lib/signal_schema.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import copy
+import warnings
 from collections.abc import Iterator, Sequence
 from dataclasses import dataclass
 from datetime import datetime
@@ -42,6 +43,8 @@ NAMES_TO_TYPES = {
     "dict": dict,
     "bytes": bytes,
     "datetime": datetime,
+    "Literal": Literal,
+    "Union": Union,
 }
@@ -49,6 +52,10 @@ class SignalSchemaError(DataChainParamsError):
     pass
+class SignalSchemaWarning(RuntimeWarning):
+    pass
 class SignalResolvingError(SignalSchemaError):
     def __init__(self, path: Optional[list[str]], msg: str):
         name = " '" + ".".join(path) + "'" if path else ""
@@ -69,6 +76,28 @@ class SignalResolvingTypeError(SignalResolvingError):
         )
+def create_feature_model(
+    name: str, fields: dict[str, Union[type, tuple[type, Any]]]
+) -> type[BaseModel]:
+    """
+    This gets or returns a dynamic feature model for use in restoring a model
+    from the custom_types stored within a serialized SignalSchema. This is useful
+    when using a custom feature model where the original definition is not available.
+    This happens in Studio and if a custom model is used in a dataset, then that dataset
+    is used in a DataChain in a separate script where that model is not declared.
+    """
+    name = name.replace("@", "_")
+    return create_model(
+        name,
+        __base__=DataModel,  # type: ignore[call-overload]
+        # These are tuples for each field of: annotation, default (if any)
+        **{
+            field_name: anno if isinstance(anno, tuple) else (anno, None)
+            for field_name, anno in fields.items()
+        },
+    )
 @dataclass
 class SignalSchema:
     values: dict[str, DataType]
@@ -117,40 +146,115 @@ class SignalSchema:
                 )
         return SignalSchema(signals)
-    def serialize(self) -> dict[str, str]:
-        signals = {}
+    @staticmethod
+    def _get_name_original_type(fr_type: type) -> tuple[str, type]:
+        """Returns the name of and the original type for the given type,
+        based on whether the type is Optional or not."""
+        orig = get_origin(fr_type)
+        args = get_args(fr_type)
+        # Check if fr_type is Optional
+        if orig == Union and len(args) == 2 and (type(None) in args):
+            fr_type = args[0]
+            orig = get_origin(fr_type)
+        if orig in (Literal, LiteralEx):
+            # Literal has no __name__ in Python 3.9
+            type_name = "Literal"
+        elif orig == Union:
+            # Union also has no __name__ in Python 3.9
+            type_name = "Union"
+        else:
+            type_name = str(fr_type.__name__)  # type: ignore[union-attr]
+        return type_name, fr_type
+    @staticmethod
+    def serialize_custom_model_fields(
+        name: str, fr: type, custom_types: dict[str, Any]
+    ) -> str:
+        """This serializes any custom type information to the provided custom_types
+        dict, and returns the name of the type provided."""
+        if hasattr(fr, "__origin__") or not issubclass(fr, BaseModel):
+            # Don't store non-feature types.
+            return name
+        version_name = ModelStore.get_name(fr)
+        if version_name in custom_types:
+            # This type is already stored in custom_types.
+            return version_name
+        fields = {}
+        for field_name, info in fr.model_fields.items():
+            field_type = info.annotation
+            # All fields should be typed.
+            assert field_type
+            field_type_name, field_type = SignalSchema._get_name_original_type(
+                field_type
+            )
+            # Serialize this type to custom_types if it is a custom type as well.
+            fields[field_name] = SignalSchema.serialize_custom_model_fields(
+                field_type_name, field_type, custom_types
+            )
+        custom_types[version_name] = fields
+        return version_name
+    def serialize(self) -> dict[str, Any]:
+        signals: dict[str, Any] = {}
+        custom_types: dict[str, Any] = {}
         for name, fr_type in self.values.items():
             if (fr := ModelStore.to_pydantic(fr_type)) is not None:
                 ModelStore.register(fr)
                 signals[name] = ModelStore.get_name(fr)
+                type_name, fr_type = SignalSchema._get_name_original_type(fr)
             else:
-                orig = get_origin(fr_type)
-                args = get_args(fr_type)
-                # Check if fr_type is Optional
-                if orig == Union and len(args) == 2 and (type(None) in args):
-                    fr_type = args[0]
-                signals[name] = str(fr_type.__name__)  # type: ignore[union-attr]
+                type_name, fr_type = SignalSchema._get_name_original_type(fr_type)
+                signals[name] = type_name
+            self.serialize_custom_model_fields(type_name, fr_type, custom_types)
+        if custom_types:
+            signals["_custom_types"] = custom_types
         return signals
     @staticmethod
-    def deserialize(schema: dict[str, str]) -> "SignalSchema":
+    def _resolve_type(type_name: str, custom_types: dict[str, Any]) -> Optional[type]:
+        """Convert a string-based type back into a python type."""
+        fr = NAMES_TO_TYPES.get(type_name)
+        if fr:
+            return fr  # type: ignore[return-value]
+        model_name, version = ModelStore.parse_name_version(type_name)
+        fr = ModelStore.get(model_name, version)
+        if fr:
+            return fr
+        if type_name in custom_types:
+            fields = custom_types[type_name]
+            fields = {
+                field_name: SignalSchema._resolve_type(field_type_str, custom_types)
+                for field_name, field_type_str in fields.items()
+            }
+            return create_feature_model(type_name, fields)
+        return None
+    @staticmethod
+    def deserialize(schema: dict[str, Any]) -> "SignalSchema":
         if not isinstance(schema, dict):
             raise SignalSchemaError(f"cannot deserialize signal schema: {schema}")
         signals: dict[str, DataType] = {}
+        custom_types: dict[str, Any] = schema.get("_custom_types", {})
         for signal, type_name in schema.items():
+            if signal == "_custom_types":
+                # This entry is used as a lookup for custom types,
+                # and is not an actual field.
+                continue
             try:
-                fr = NAMES_TO_TYPES.get(type_name)
-                if not fr:
-                    type_name, version = ModelStore.parse_name_version(type_name)
-                    fr = ModelStore.get(type_name, version)
-                    if not fr:
-                        raise SignalSchemaError(
-                            f"cannot deserialize '{signal}': "
-                            f"unknown type '{type_name}'."
-                            f" Try to add it with `ModelStore.register({type_name})`."
-                        )
+                fr = SignalSchema._resolve_type(type_name, custom_types)
+                if fr is None:
+                    # Skip if the type is not found, so all data can be displayed.
+                    warnings.warn(
+                        f"In signal '{signal}': "
+                        f"unknown type '{type_name}'."
+                        f" Try to add it with `ModelStore.register({type_name})`.",
+                        SignalSchemaWarning,
+                        stacklevel=2,
+                    )
+                    continue
             except TypeError as err:
                 raise SignalSchemaError(
                     f"cannot deserialize '{signal}': {err}"

datachain/lib/text.py CHANGED Viewed

@@ -9,6 +9,7 @@ def convert_text(
     tokenizer: Optional[Callable] = None,
     tokenizer_kwargs: Optional[dict[str, Any]] = None,
     encoder: Optional[Callable] = None,
+    device: Optional[Union[str, torch.device]] = None,
 ) -> Union[str, list[str], torch.Tensor]:
     """
     Tokenize and otherwise transform text.
@@ -18,6 +19,7 @@ def convert_text(
         tokenizer (Callable): Tokenizer to use to tokenize objects.
         tokenizer_kwargs (dict): Additional kwargs to pass when calling tokenizer.
         encoder (Callable): Encode text using model.
+        device (str or torch.device): Device to use.
     """
     if not tokenizer:
         return text
@@ -32,6 +34,8 @@ def convert_text(
     tokens = res.input_ids if isinstance(tokenizer, PreTrainedTokenizerBase) else res
     tokens = torch.tensor(tokens)
+    if device:
+        tokens = tokens.to(device)
     if not encoder:
         return tokens

datachain/lib/udf.py CHANGED Viewed

@@ -242,26 +242,8 @@ class UDFBase(AbstractUDF):
         if not self.is_output_batched:
             result_objs = [result_objs]
-        if len(self.output.values) > 1:
-            res = []
-            for tuple_ in result_objs:
-                flat = []
-                for obj in tuple_:
-                    if isinstance(obj, BaseModel):
-                        flat.extend(flatten(obj))
-                    else:
-                        flat.append(obj)
-                res.append(tuple(flat))
-        else:
-            # Generator expression is required, otherwise the value will be materialized
-            res = (
-                flatten(obj)
-                if isinstance(obj, BaseModel)
-                else obj
-                if isinstance(obj, tuple)
-                else (obj,)
-                for obj in result_objs
-            )
+        # Generator expression is required, otherwise the value will be materialized
+        res = (self._flatten_row(row) for row in result_objs)
         if not self.is_output_batched:
             res = list(res)
@@ -282,6 +264,18 @@ class UDFBase(AbstractUDF):
         return res
+    def _flatten_row(self, row):
+        if len(self.output.values) > 1 and not isinstance(row, BaseModel):
+            flat = []
+            for obj in row:
+                flat.extend(self._obj_to_list(obj))
+            return tuple(flat)
+        return row if isinstance(row, tuple) else tuple(self._obj_to_list(row))
+    @staticmethod
+    def _obj_to_list(obj):
+        return flatten(obj) if isinstance(obj, BaseModel) else [obj]
     def _parse_rows(self, rows, cache, download_cb):
         objs = []
         for row in rows:

datachain/query/dataset.py CHANGED Viewed

@@ -24,6 +24,7 @@ from typing import (
 )
 import attrs
+import psutil
 import sqlalchemy
 import sqlalchemy as sa
 from attrs import frozen
@@ -383,7 +384,7 @@ def process_udf_outputs(
     udf_table: "Table",
     udf_results: Iterator[Iterable["UDFResult"]],
     udf: UDFBase,
-    batch_size=INSERT_BATCH_SIZE,
+    batch_size: int = INSERT_BATCH_SIZE,
     cb: Callback = DEFAULT_CALLBACK,
 ) -> None:
     rows: list[UDFResult] = []
@@ -396,7 +397,9 @@ def process_udf_outputs(
         for row in udf_output:
             cb.relative_update()
             rows.append(adjust_outputs(warehouse, row, udf_col_types))
-            if len(rows) >= batch_size:
+            if len(rows) >= batch_size or (
+                len(rows) % 10 == 0 and psutil.virtual_memory().percent > 80
+            ):
                 for row_chunk in batched(rows, batch_size):
                     warehouse.insert_rows(udf_table, row_chunk)
                 rows.clear()
@@ -1775,6 +1778,10 @@ def query_wrapper(dataset_query: DatasetQuery) -> DatasetQuery:
     save = bool(os.getenv("DATACHAIN_QUERY_SAVE"))
     save_as = os.getenv("DATACHAIN_QUERY_SAVE_AS")
+    is_session_temp_dataset = dataset_query.name and dataset_query.name.startswith(
+        dataset_query.session.get_temp_prefix()
+    )
     if save_as:
         if dataset_query.attached:
             dataset_name = dataset_query.name
@@ -1801,7 +1808,7 @@ def query_wrapper(dataset_query: DatasetQuery) -> DatasetQuery:
             )
         else:
             dataset_query = dataset_query.save(save_as)
-    elif save and not dataset_query.attached:
+    elif save and (is_session_temp_dataset or not dataset_query.attached):
         name = catalog.generate_query_dataset_name()
         dataset_query = dataset_query.save(name)

datachain/query/session.py CHANGED Viewed

@@ -74,11 +74,13 @@ class Session:
             self.catalog.id_generator.close_on_exit()
     def generate_temp_dataset_name(self) -> str:
-        tmp_table_uid = uuid4().hex[: self.TEMP_TABLE_UUID_LEN]
-        return f"{self.DATASET_PREFIX}{self.name}_{tmp_table_uid}"
+        return self.get_temp_prefix() + uuid4().hex[: self.TEMP_TABLE_UUID_LEN]
+    def get_temp_prefix(self) -> str:
+        return f"{self.DATASET_PREFIX}{self.name}_"
     def _cleanup_temp_datasets(self) -> None:
-        prefix = f"{self.DATASET_PREFIX}{self.name}"
+        prefix = self.get_temp_prefix()
         try:
             for dataset in list(self.catalog.metastore.list_datasets_by_prefix(prefix)):
                 self.catalog.remove_dataset(dataset.name, force=True)

{datachain-0.3.6.dist-info → datachain-0.3.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datachain
-Version: 0.3.6
+Version: 0.3.8
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License: Apache-2.0
@@ -41,10 +41,11 @@ Requires-Dist: jmespath >=1.0
 Requires-Dist: datamodel-code-generator >=0.25
 Requires-Dist: Pillow <11,>=10.0.0
 Requires-Dist: msgpack <2,>=1.0.4
+Requires-Dist: psutil
 Requires-Dist: numpy <2,>=1 ; sys_platform == "win32"
 Provides-Extra: dev
 Requires-Dist: datachain[docs,tests] ; extra == 'dev'
-Requires-Dist: mypy ==1.11.1 ; extra == 'dev'
+Requires-Dist: mypy ==1.11.2 ; extra == 'dev'
 Requires-Dist: types-python-dateutil ; extra == 'dev'
 Requires-Dist: types-pytz ; extra == 'dev'
 Requires-Dist: types-PyYAML ; extra == 'dev'
@@ -64,11 +65,14 @@ Requires-Dist: accelerate ; extra == 'examples'
 Requires-Dist: unstructured[pdf] ; extra == 'examples'
 Requires-Dist: pdfplumber ==0.11.4 ; extra == 'examples'
 Requires-Dist: huggingface-hub[hf_transfer] ; extra == 'examples'
+Provides-Extra: hf
+Requires-Dist: numba >=0.60.0 ; extra == 'hf'
+Requires-Dist: datasets[audio,vision] ; extra == 'hf'
 Provides-Extra: remote
 Requires-Dist: lz4 ; extra == 'remote'
 Requires-Dist: requests >=2.22.0 ; extra == 'remote'
 Provides-Extra: tests
-Requires-Dist: datachain[remote,torch,vector] ; extra == 'tests'
+Requires-Dist: datachain[hf,remote,torch,vector] ; extra == 'tests'
 Requires-Dist: pytest <9,>=8 ; extra == 'tests'
 Requires-Dist: pytest-sugar >=0.9.6 ; extra == 'tests'
 Requires-Dist: pytest-cov >=4.1.0 ; extra == 'tests'
@@ -83,6 +87,7 @@ Requires-Dist: hypothesis ; extra == 'tests'
 Requires-Dist: open-clip-torch ; extra == 'tests'
 Requires-Dist: aiotools >=1.7.0 ; extra == 'tests'
 Requires-Dist: requests-mock ; extra == 'tests'
+Requires-Dist: scipy ; extra == 'tests'
 Provides-Extra: torch
 Requires-Dist: torch >=2.1.0 ; extra == 'torch'
 Requires-Dist: torchvision ; extra == 'torch'

datachain 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

Potentially problematic release.

datachain 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl