PyPI - datachain - Versions diffs - 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl - Mend

datachain 0.2.8py3-none-any.whl → 0.2.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (50) hide show

datachain/__init__.py +17 -8
datachain/catalog/catalog.py +5 -5
datachain/cli.py +0 -2
datachain/data_storage/schema.py +5 -5
datachain/data_storage/sqlite.py +1 -1
datachain/data_storage/warehouse.py +7 -7
datachain/lib/arrow.py +25 -8
datachain/lib/clip.py +6 -11
datachain/lib/convert/__init__.py +0 -0
datachain/lib/convert/flatten.py +67 -0
datachain/lib/convert/type_converter.py +96 -0
datachain/lib/convert/unflatten.py +69 -0
datachain/lib/convert/values_to_tuples.py +85 -0
datachain/lib/data_model.py +74 -0
datachain/lib/dc.py +192 -167
datachain/lib/feature_registry.py +36 -10
datachain/lib/file.py +41 -41
datachain/lib/gpt4_vision.py +1 -9
datachain/lib/hf_image_to_text.py +9 -17
datachain/lib/hf_pipeline.py +4 -12
datachain/lib/image.py +2 -18
datachain/lib/image_transform.py +0 -1
datachain/lib/iptc_exif_xmp.py +8 -15
datachain/lib/meta_formats.py +1 -5
datachain/lib/model_store.py +77 -0
datachain/lib/pytorch.py +9 -21
datachain/lib/signal_schema.py +120 -58
datachain/lib/text.py +5 -16
datachain/lib/udf.py +114 -30
datachain/lib/udf_signature.py +5 -5
datachain/lib/webdataset.py +3 -4
datachain/lib/webdataset_laion.py +2 -3
datachain/node.py +4 -4
datachain/query/batch.py +1 -1
datachain/query/dataset.py +40 -60
datachain/query/dispatch.py +28 -17
datachain/query/udf.py +46 -26
datachain/remote/studio.py +1 -9
datachain/torch/__init__.py +21 -0
{datachain-0.2.8.dist-info → datachain-0.2.10.dist-info}/METADATA +13 -12
{datachain-0.2.8.dist-info → datachain-0.2.10.dist-info}/RECORD +45 -42
datachain/image/__init__.py +0 -3
datachain/lib/cached_stream.py +0 -38
datachain/lib/claude.py +0 -69
datachain/lib/feature.py +0 -412
datachain/lib/feature_utils.py +0 -154
{datachain-0.2.8.dist-info → datachain-0.2.10.dist-info}/LICENSE +0 -0
{datachain-0.2.8.dist-info → datachain-0.2.10.dist-info}/WHEEL +0 -0
{datachain-0.2.8.dist-info → datachain-0.2.10.dist-info}/entry_points.txt +0 -0
{datachain-0.2.8.dist-info → datachain-0.2.10.dist-info}/top_level.txt +0 -0

datachain/node.py CHANGED Viewed

@@ -46,8 +46,8 @@ class DirTypeGroup:
 @attrs.define
 class Node:
-    id: int = 0
-    random: int = -1
+    sys__id: int = 0
+    sys__rand: int = -1
     vtype: str = ""
     dir_type: Optional[int] = None
     parent: str = ""
@@ -127,11 +127,11 @@ class Node:
     @classmethod
     def from_dir(cls, parent, name, **kwargs) -> "Node":
-        return cls(id=-1, dir_type=DirType.DIR, parent=parent, name=name, **kwargs)
+        return cls(sys__id=-1, dir_type=DirType.DIR, parent=parent, name=name, **kwargs)
     @classmethod
     def root(cls) -> "Node":
-        return cls(-1, dir_type=DirType.DIR)
+        return cls(sys__id=-1, dir_type=DirType.DIR)
 @attrs.define

datachain/query/batch.py CHANGED Viewed

@@ -104,7 +104,7 @@ class Partition(BatchingStrategy):
         with contextlib.closing(
             execute(
                 query,
-                order_by=(PARTITION_COLUMN_ID, "id", *query._order_by_clauses),
+                order_by=(PARTITION_COLUMN_ID, "sys__id", *query._order_by_clauses),
                 limit=query._limit,
             )
         ) as rows:

datachain/query/dataset.py CHANGED Viewed

@@ -31,6 +31,7 @@ import sqlalchemy
 from attrs import frozen
 from dill import dumps, source
 from fsspec.callbacks import DEFAULT_CALLBACK, Callback, TqdmCallback
+from pydantic import BaseModel
 from sqlalchemy import Column
 from sqlalchemy.sql import func as f
 from sqlalchemy.sql.elements import ColumnClause, ColumnElement
@@ -57,7 +58,6 @@ from datachain.sql.functions import rand
 from datachain.storage import Storage, StorageURI
 from datachain.utils import batched, determine_processes, inside_notebook
-from .batch import RowBatch
 from .metrics import metrics
 from .schema import C, UDFParamSpec, normalize_param
 from .session import Session
@@ -257,7 +257,7 @@ class DatasetDiffOperation(Step):
         """
     def apply(self, query_generator, temp_tables: list[str]):
-        source_query = query_generator.exclude(("id",))
+        source_query = query_generator.exclude(("sys__id",))
         target_query = self.dq.apply_steps().select()
         temp_tables.extend(self.dq.temp_table_names)
@@ -427,22 +427,6 @@ def get_generated_callback(is_generator: bool = False) -> Callback:
     return DEFAULT_CALLBACK
-def run_udf(
-    udf,
-    udf_inputs,
-    catalog,
-    is_generator,
-    cache,
-    download_cb: Callback = DEFAULT_CALLBACK,
-    processed_cb: Callback = DEFAULT_CALLBACK,
-) -> Iterator[Iterable["UDFResult"]]:
-    for batch in udf_inputs:
-        n_rows = len(batch.rows) if isinstance(batch, RowBatch) else 1
-        output = udf(catalog, batch, is_generator, cache, cb=download_cb)
-        processed_cb.relative_update(n_rows)
-        yield output
 @frozen
 class UDF(Step, ABC):
     udf: UDFType
@@ -548,9 +532,6 @@ class UDF(Step, ABC):
                 else:
                     udf = self.udf
-                if hasattr(udf.func, "setup") and callable(udf.func.setup):
-                    udf.func.setup()
                 warehouse = self.catalog.warehouse
                 with contextlib.closing(
@@ -560,8 +541,7 @@ class UDF(Step, ABC):
                     processed_cb = get_processed_callback()
                     generated_cb = get_generated_callback(self.is_generator)
                     try:
-                        udf_results = run_udf(
-                            udf,
+                        udf_results = udf.run(
                             udf_inputs,
                             self.catalog,
                             self.is_generator,
@@ -583,9 +563,6 @@ class UDF(Step, ABC):
                 warehouse.insert_rows_done(udf_table)
-                if hasattr(udf.func, "teardown") and callable(udf.func.teardown):
-                    udf.func.teardown()
         except QueryScriptCancelError:
             self.catalog.warehouse.close()
             sys.exit(QUERY_SCRIPT_CANCELED_EXIT_CODE)
@@ -663,7 +640,7 @@ class UDF(Step, ABC):
         # fill table with partitions
         cols = [
-            query.selected_columns.id,
+            query.selected_columns.sys__id,
             f.dense_rank().over(order_by=list_partition_by).label(PARTITION_COLUMN_ID),
         ]
         self.catalog.warehouse.db.execute(
@@ -697,7 +674,7 @@ class UDF(Step, ABC):
             subq = query.subquery()
             query = (
                 sqlalchemy.select(*subq.c)
-                .outerjoin(partition_tbl, partition_tbl.c.id == subq.c.id)
+                .outerjoin(partition_tbl, partition_tbl.c.sys__id == subq.c.sys__id)
                 .add_columns(*partition_columns())
             )
@@ -729,18 +706,18 @@ class UDFSignal(UDF):
         columns = [
             sqlalchemy.Column(c.name, c.type)
             for c in query.selected_columns
-            if c.name != "id"
+            if c.name != "sys__id"
         ]
         table = self.catalog.warehouse.create_udf_table(self.udf_table_name(), columns)
         select_q = query.with_only_columns(
-            *[c for c in query.selected_columns if c.name != "id"]
+            *[c for c in query.selected_columns if c.name != "sys__id"]
         )
         # if there is order by clause we need row_number to preserve order
         # if there is no order by clause we still need row_number to generate
         # unique ids as uniqueness is important for this table
         select_q = select_q.add_columns(
-            f.row_number().over(order_by=select_q._order_by_clauses).label("id")
+            f.row_number().over(order_by=select_q._order_by_clauses).label("sys__id")
         )
         self.catalog.warehouse.db.execute(
@@ -756,7 +733,7 @@ class UDFSignal(UDF):
         if query._order_by_clauses:
             # we are adding ordering only if it's explicitly added by user in
             # query part before adding signals
-            q = q.order_by(table.c.id)
+            q = q.order_by(table.c.sys__id)
         return q, [table]
     def create_result_query(
@@ -766,7 +743,7 @@ class UDFSignal(UDF):
         original_cols = [c for c in subq.c if c.name not in partition_col_names]
         # new signal columns that are added to udf_table
-        signal_cols = [c for c in udf_table.c if c.name != "id"]
+        signal_cols = [c for c in udf_table.c if c.name != "sys__id"]
         signal_name_cols = {c.name: c for c in signal_cols}
         cols = signal_cols
@@ -786,7 +763,7 @@ class UDFSignal(UDF):
                 res = (
                     sqlalchemy.select(*cols1)
                     .select_from(subq)
-                    .outerjoin(udf_table, udf_table.c.id == subq.c.id)
+                    .outerjoin(udf_table, udf_table.c.sys__id == subq.c.sys__id)
                     .add_columns(*cols2)
                 )
             else:
@@ -795,7 +772,7 @@ class UDFSignal(UDF):
             if query._order_by_clauses:
                 # if ordering is used in query part before adding signals, we
                 # will have it as order by id from select from pre-created udf table
-                res = res.order_by(subq.c.id)
+                res = res.order_by(subq.c.sys__id)
             if self.partition_by is not None:
                 subquery = res.subquery()
@@ -833,7 +810,7 @@ class RowGenerator(UDF):
             # we get the same rows as we got as inputs of UDF since selecting
             # without ordering can be non deterministic in some databases
             c = query.selected_columns
-            query = query.order_by(c.id)
+            query = query.order_by(c.sys__id)
         udf_table_query = udf_table.select().subquery()
         udf_table_cols: list[sqlalchemy.Label[Any]] = [
@@ -1025,7 +1002,7 @@ class SQLJoin(Step):
         q1_column_names = {c.name for c in q1_columns}
         q2_columns = [
             c
-            if c.name not in q1_column_names and c.name != "id"
+            if c.name not in q1_column_names and c.name != "sys__id"
             else c.label(self.rname.format(name=c.name))
             for c in q2.c
         ]
@@ -1165,8 +1142,8 @@ class DatasetQuery:
             self.version = version or ds.latest_version
             self.feature_schema = ds.get_version(self.version).feature_schema
             self.column_types = copy(ds.schema)
-            if "id" in self.column_types:
-                self.column_types.pop("id")
+            if "sys__id" in self.column_types:
+                self.column_types.pop("sys__id")
             self.starting_step = QueryStep(self.catalog, name, self.version)
             # attaching to specific dataset
             self.name = name
@@ -1239,7 +1216,7 @@ class DatasetQuery:
             query.steps = self._chunk_limit(query.steps, index, total)
             # Prepend the chunk filter to the step chain.
-            query = query.filter(C.random % total == index)
+            query = query.filter(C.sys__rand % total == index)
             query.steps = query.steps[-1:] + query.steps[:-1]
         result = query.starting_step.apply()
@@ -1366,10 +1343,8 @@ class DatasetQuery:
         finally:
             self.cleanup()
-    def to_records(self) -> list[dict]:
-        with self.as_iterable() as result:
-            cols = result.columns
-            return [dict(zip(cols, row)) for row in result]
+    def to_records(self) -> list[dict[str, Any]]:
+        return self.results(lambda cols, row: dict(zip(cols, row)))
     def to_pandas(self) -> "pd.DataFrame":
         records = self.to_records()
@@ -1379,7 +1354,7 @@ class DatasetQuery:
     def shuffle(self) -> "Self":
         # ToDo: implement shaffle based on seed and/or generating random column
-        return self.order_by(C.random)
+        return self.order_by(C.sys__rand)
     def sample(self, n) -> "Self":
         """
@@ -1508,30 +1483,35 @@ class DatasetQuery:
         query.steps.append(SQLOffset(offset))
         return query
+    def as_scalar(self) -> Any:
+        with self.as_iterable() as rows:
+            row = next(iter(rows))
+        return row[0]
     def count(self) -> int:
         query = self.clone()
         query.steps.append(SQLCount())
-        return query.results()[0][0]
+        return query.as_scalar()
-    def sum(self, col: ColumnElement):
+    def sum(self, col: ColumnElement) -> int:
         query = self.clone()
         query.steps.append(SQLSelect((f.sum(col),)))
-        return query.results()[0][0]
+        return query.as_scalar()
-    def avg(self, col: ColumnElement):
+    def avg(self, col: ColumnElement) -> int:
         query = self.clone()
         query.steps.append(SQLSelect((f.avg(col),)))
-        return query.results()[0][0]
+        return query.as_scalar()
-    def min(self, col: ColumnElement):
+    def min(self, col: ColumnElement) -> int:
         query = self.clone()
         query.steps.append(SQLSelect((f.min(col),)))
-        return query.results()[0][0]
+        return query.as_scalar()
-    def max(self, col: ColumnElement):
+    def max(self, col: ColumnElement) -> int:
         query = self.clone()
         query.steps.append(SQLSelect((f.max(col),)))
-        return query.results()[0][0]
+        return query.as_scalar()
     @detach
     def group_by(self, *cols: ColumnElement) -> "Self":
@@ -1723,7 +1703,7 @@ class DatasetQuery:
                 c if isinstance(c, Column) else Column(c.name, c.type)
                 for c in query.columns
             ]
-            if not [c for c in columns if c.name != "id"]:
+            if not [c for c in columns if c.name != "sys__id"]:
                 raise RuntimeError(
                     "No columns to save in the query. "
                     "Ensure at least one column (other than 'id') is selected."
@@ -1742,11 +1722,11 @@ class DatasetQuery:
             # Exclude the id column and let the db create it to avoid unique
             # constraint violations.
-            q = query.exclude(("id",))
+            q = query.exclude(("sys__id",))
             if q._order_by_clauses:
                 # ensuring we have id sorted by order by clause if it exists in a query
                 q = q.add_columns(
-                    f.row_number().over(order_by=q._order_by_clauses).label("id")
+                    f.row_number().over(order_by=q._order_by_clauses).label("sys__id")
                 )
             cols = tuple(c.name for c in q.columns)
@@ -1876,9 +1856,9 @@ def _random_string(length: int) -> str:
 def _feature_predicate(obj):
-    from datachain.lib.feature import Feature
-    return inspect.isclass(obj) and source.isfrommain(obj) and issubclass(obj, Feature)
+    return (
+        inspect.isclass(obj) and source.isfrommain(obj) and issubclass(obj, BaseModel)
+    )
 def _imports(obj):

datachain/query/dispatch.py CHANGED Viewed

@@ -16,7 +16,6 @@ from multiprocess import get_context
 from datachain.catalog import Catalog
 from datachain.catalog.loader import get_distributed_class
-from datachain.query.batch import RowBatch
 from datachain.query.dataset import (
     get_download_callback,
     get_generated_callback,
@@ -355,6 +354,15 @@ class WorkerCallback(Callback):
         put_into_queue(self.queue, {"status": NOTIFY_STATUS, "downloaded": inc})
+class ProcessedCallback(Callback):
+    def __init__(self):
+        self.processed_rows: Optional[int] = None
+        super().__init__()
+    def relative_update(self, inc: int = 1) -> None:
+        self.processed_rows = inc
 @attrs.define
 class UDFWorker:
     catalog: Catalog
@@ -370,25 +378,28 @@ class UDFWorker:
         return WorkerCallback(self.done_queue)
     def run(self) -> None:
-        if hasattr(self.udf.func, "setup") and callable(self.udf.func.setup):
-            self.udf.func.setup()
-        while (batch := get_from_queue(self.task_queue)) != STOP_SIGNAL:
-            n_rows = len(batch.rows) if isinstance(batch, RowBatch) else 1
-            udf_output = self.udf(
-                self.catalog,
-                batch,
-                is_generator=self.is_generator,
-                cache=self.cache,
-                cb=self.cb,
-            )
+        processed_cb = ProcessedCallback()
+        udf_results = self.udf.run(
+            self.get_inputs(),
+            self.catalog,
+            self.is_generator,
+            self.cache,
+            download_cb=self.cb,
+            processed_cb=processed_cb,
+        )
+        for udf_output in udf_results:
             if isinstance(udf_output, GeneratorType):
                 udf_output = list(udf_output)  # can not pickle generator
             put_into_queue(
                 self.done_queue,
-                {"status": OK_STATUS, "result": udf_output, "processed": n_rows},
+                {
+                    "status": OK_STATUS,
+                    "result": udf_output,
+                    "processed": processed_cb.processed_rows,
+                },
             )
-        if hasattr(self.udf.func, "teardown") and callable(self.udf.func.teardown):
-            self.udf.func.teardown()
         put_into_queue(self.done_queue, {"status": FINISHED_STATUS})
+    def get_inputs(self):
+        while (batch := get_from_queue(self.task_queue)) != STOP_SIGNAL:
+            yield batch

datachain/query/udf.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import typing
-from collections.abc import Iterable, Mapping, Sequence
+from collections.abc import Iterable, Iterator, Mapping, Sequence
 from dataclasses import dataclass
 from functools import WRAPPER_ASSIGNMENTS
 from inspect import isclass
@@ -14,7 +14,6 @@ from typing import (
 from fsspec.callbacks import DEFAULT_CALLBACK, Callback
 from datachain.dataset import RowDict
-from datachain.lib.utils import AbstractUDF
 from .batch import Batch, BatchingStrategy, NoBatching, Partition, RowBatch
 from .schema import (
@@ -100,15 +99,28 @@ class UDFBase:
     def __init__(
         self,
-        func: Callable,
         properties: UDFProperties,
     ):
-        self.func = func
         self.properties = properties
         self.signal_names = properties.signal_names()
         self.output = properties.output
-    def __call__(
+    def run(
+        self,
+        udf_inputs: "Iterable[BatchingResult]",
+        catalog: "Catalog",
+        is_generator: bool,
+        cache: bool,
+        download_cb: Callback = DEFAULT_CALLBACK,
+        processed_cb: Callback = DEFAULT_CALLBACK,
+    ) -> Iterator[Iterable["UDFResult"]]:
+        for batch in udf_inputs:
+            n_rows = len(batch.rows) if isinstance(batch, RowBatch) else 1
+            output = self.run_once(catalog, batch, is_generator, cache, cb=download_cb)
+            processed_cb.relative_update(n_rows)
+            yield output
+    def run_once(
         self,
         catalog: "Catalog",
         arg: "BatchingResult",
@@ -116,24 +128,7 @@ class UDFBase:
         cache: bool = False,
         cb: Callback = DEFAULT_CALLBACK,
     ) -> Iterable[UDFResult]:
-        if isinstance(self.func, AbstractUDF):
-            self.func._catalog = catalog  # type: ignore[unreachable]
-        if isinstance(arg, RowBatch):
-            udf_inputs = [
-                self.bind_parameters(catalog, row, cache=cache, cb=cb)
-                for row in arg.rows
-            ]
-            udf_outputs = self.func(udf_inputs)
-            return self._process_results(arg.rows, udf_outputs, is_generator)
-        if isinstance(arg, RowDict):
-            udf_inputs = self.bind_parameters(catalog, arg, cache=cache, cb=cb)
-            udf_outputs = self.func(*udf_inputs)
-            if not is_generator:
-                # udf_outputs is generator already if is_generator=True
-                udf_outputs = [udf_outputs]
-            return self._process_results([arg], udf_outputs, is_generator)
-        raise ValueError(f"Unexpected UDF argument: {arg}")
+        raise NotImplementedError
     def bind_parameters(self, catalog: "Catalog", row: "RowDict", **kwargs) -> list:
         return [p.get_value(catalog, row, **kwargs) for p in self.properties.params]
@@ -152,9 +147,9 @@ class UDFBase:
             return (dict(zip(self.signal_names, row)) for row in results)
         # outputting signals
-        row_ids = [row["id"] for row in rows]
+        row_ids = [row["sys__id"] for row in rows]
         return [
-            dict(id=row_id, **dict(zip(self.signal_names, signals)))
+            {"sys__id": row_id} | dict(zip(self.signal_names, signals))
             for row_id, signals in zip(row_ids, results)
             if signals is not None  # skip rows with no output
         ]
@@ -194,12 +189,37 @@ class UDFWrapper(UDFBase):
         func: Callable,
         properties: UDFProperties,
     ):
-        super().__init__(func, properties)
+        self.func = func
+        super().__init__(properties)
         # This emulates the behavior of functools.wraps for a class decorator
         for attr in WRAPPER_ASSIGNMENTS:
             if hasattr(func, attr):
                 setattr(self, attr, getattr(func, attr))
+    def run_once(
+        self,
+        catalog: "Catalog",
+        arg: "BatchingResult",
+        is_generator: bool = False,
+        cache: bool = False,
+        cb: Callback = DEFAULT_CALLBACK,
+    ) -> Iterable[UDFResult]:
+        if isinstance(arg, RowBatch):
+            udf_inputs = [
+                self.bind_parameters(catalog, row, cache=cache, cb=cb)
+                for row in arg.rows
+            ]
+            udf_outputs = self.func(udf_inputs)
+            return self._process_results(arg.rows, udf_outputs, is_generator)
+        if isinstance(arg, RowDict):
+            udf_inputs = self.bind_parameters(catalog, arg, cache=cache, cb=cb)
+            udf_outputs = self.func(*udf_inputs)
+            if not is_generator:
+                # udf_outputs is generator already if is_generator=True
+                udf_outputs = [udf_outputs]
+            return self._process_results([arg], udf_outputs, is_generator)
+        raise ValueError(f"Unexpected UDF argument: {arg}")
     # This emulates the behavior of functools.wraps for a class decorator
     def __repr__(self):
         return repr(self.func)

datachain/remote/studio.py CHANGED Viewed

@@ -190,19 +190,11 @@ class StudioClient:
     def dataset_rows_chunk(
         self, name: str, version: int, offset: int
     ) -> Response[DatasetRowsData]:
-        def _parse_row(row):
-            row["id"] = int(row["id"])
-            return row
         req_data = {"dataset_name": name, "dataset_version": version}
-        response = self._send_request_msgpack(
+        return self._send_request_msgpack(
             "dataset-rows",
             {**req_data, "offset": offset, "limit": DATASET_ROWS_CHUNK_SIZE},
         )
-        if response.ok:
-            response.data = [_parse_row(r) for r in response.data]
-        return response
     def dataset_stats(self, name: str, version: int) -> Response[DatasetStatsData]:
         response = self._send_request(

datachain/torch/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+try:
+    from datachain.lib.clip import similarity_scores as clip_similarity_scores
+    from datachain.lib.image import convert_image, convert_images
+    from datachain.lib.pytorch import PytorchDataset, label_to_int
+    from datachain.lib.text import convert_text
+except ImportError as exc:
+    raise ImportError(
+        "Missing dependencies for torch:\n"
+        "To install run:\n\n"
+        "  pip install 'datachain[torch]'\n"
+    ) from exc
+__all__ = [
+    "PytorchDataset",
+    "clip_similarity_scores",
+    "convert_image",
+    "convert_images",
+    "convert_text",
+    "label_to_int",
+]

{datachain-0.2.8.dist-info → datachain-0.2.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datachain
-Version: 0.2.8
+Version: 0.2.10
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License: Apache-2.0
@@ -38,12 +38,8 @@ Requires-Dist: ujson >=5.9.0
 Requires-Dist: pydantic <3,>=2
 Requires-Dist: jmespath >=1.0
 Requires-Dist: datamodel-code-generator >=0.25
+Requires-Dist: Pillow <11,>=10.0.0
 Requires-Dist: numpy <2,>=1 ; sys_platform == "win32"
-Provides-Extra: cv
-Requires-Dist: Pillow <11,>=10.0.0 ; extra == 'cv'
-Requires-Dist: torch >=2.1.0 ; extra == 'cv'
-Requires-Dist: torchvision ; extra == 'cv'
-Requires-Dist: transformers >=4.36.0 ; extra == 'cv'
 Provides-Extra: dev
 Requires-Dist: datachain[docs,tests] ; extra == 'dev'
 Requires-Dist: mypy ==1.10.1 ; extra == 'dev'
@@ -63,7 +59,7 @@ Requires-Dist: lz4 ; extra == 'remote'
 Requires-Dist: msgpack <2,>=1.0.4 ; extra == 'remote'
 Requires-Dist: requests >=2.22.0 ; extra == 'remote'
 Provides-Extra: tests
-Requires-Dist: datachain[cv,remote,vector] ; extra == 'tests'
+Requires-Dist: datachain[remote,torch,vector] ; extra == 'tests'
 Requires-Dist: pytest <9,>=8 ; extra == 'tests'
 Requires-Dist: pytest-sugar >=0.9.6 ; extra == 'tests'
 Requires-Dist: pytest-cov >=4.1.0 ; extra == 'tests'
@@ -78,6 +74,10 @@ Requires-Dist: hypothesis ; extra == 'tests'
 Requires-Dist: open-clip-torch ; extra == 'tests'
 Requires-Dist: aiotools >=1.7.0 ; extra == 'tests'
 Requires-Dist: requests-mock ; extra == 'tests'
+Provides-Extra: torch
+Requires-Dist: torch >=2.1.0 ; extra == 'torch'
+Requires-Dist: torchvision ; extra == 'torch'
+Requires-Dist: transformers >=4.36.0 ; extra == 'torch'
 Provides-Extra: vector
 Requires-Dist: usearch ; extra == 'vector'
@@ -89,11 +89,11 @@ Requires-Dist: usearch ; extra == 'vector'
 .. |Python Version| image:: https://img.shields.io/pypi/pyversions/datachain
    :target: https://pypi.org/project/datachain
    :alt: Python Version
-.. |Codecov| image:: https://codecov.io/gh/iterative/dvcx/branch/main/graph/badge.svg?token=VSCP2T9R5X
-   :target: https://app.codecov.io/gh/iterative/dvcx
+.. |Codecov| image:: https://codecov.io/gh/iterative/datachain/graph/badge.svg?token=byliXGGyGB
+   :target: https://codecov.io/gh/iterative/datachain
    :alt: Codecov
-.. |Tests| image:: https://github.com/iterative/dvcx/workflows/Tests/badge.svg
-   :target: https://github.com/iterative/dvcx/actions?workflow=Tests
+.. |Tests| image:: https://github.com/iterative/datachain/workflows/Tests/badge.svg
+   :target: https://github.com/iterative/datachain/actions?workflow=Tests
    :alt: Tests
 AI 🔗 DataChain
@@ -397,7 +397,8 @@ Chain results can be exported or passed directly to Pytorch dataloader. For exam
 Tutorials
 ------------------
-* `Multimodal <examples/multimodal/clip_fine_tuning.ipynb>`_ (try in `Colab <https://colab.research.google.com/github/iterative/dvclive/blob/main/examples/multimodal/clip_fine_tuning.ipynb>`__)
+* `Computer Vision <examples/computer_vision/fashion_product_images/1-quick-start.ipynb>`_ (try in `Colab <https://colab.research.google.com/github/iterative/datachain/blob/main/examples/computer_vision/fashion_product_images/1-quick-start.ipynb>`__)
+* `Multimodal <examples/multimodal/clip_fine_tuning.ipynb>`_ (try in `Colab <https://colab.research.google.com/github/iterative/datachain/blob/main/examples/multimodal/clip_fine_tuning.ipynb>`__)
 Contributions
 --------------------

datachain 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl

Potentially problematic release.

datachain 0.2.8py3-none-any.whl → 0.2.10py3-none-any.whl