PyPI - datachain - Versions diffs - 0.14.5__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

datachain 0.14.5py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (30) hide show

datachain/__init__.py +4 -0
datachain/catalog/catalog.py +19 -9
datachain/catalog/loader.py +11 -7
datachain/cli/__init__.py +1 -1
datachain/cli/commands/datasets.py +3 -3
datachain/cli/commands/show.py +2 -2
datachain/cli/parser/__init__.py +2 -2
datachain/data_storage/metastore.py +5 -5
datachain/dataset.py +8 -8
datachain/lib/convert/values_to_tuples.py +23 -14
datachain/lib/dataset_info.py +18 -0
datachain/lib/dc/__init__.py +4 -1
datachain/lib/dc/database.py +151 -0
datachain/lib/dc/datachain.py +19 -8
datachain/lib/dc/datasets.py +52 -0
datachain/lib/dc/pandas.py +8 -1
datachain/lib/dc/records.py +12 -14
datachain/lib/signal_schema.py +10 -1
datachain/lib/udf.py +2 -1
datachain/query/dataset.py +12 -14
datachain/query/dispatch.py +7 -2
datachain/query/schema.py +4 -1
datachain/remote/studio.py +2 -2
datachain/studio.py +2 -2
{datachain-0.14.5.dist-info → datachain-0.16.0.dist-info}/METADATA +1 -1
{datachain-0.14.5.dist-info → datachain-0.16.0.dist-info}/RECORD +30 -29
{datachain-0.14.5.dist-info → datachain-0.16.0.dist-info}/WHEEL +0 -0
{datachain-0.14.5.dist-info → datachain-0.16.0.dist-info}/entry_points.txt +0 -0
{datachain-0.14.5.dist-info → datachain-0.16.0.dist-info}/licenses/LICENSE +0 -0
{datachain-0.14.5.dist-info → datachain-0.16.0.dist-info}/top_level.txt +0 -0

datachain/__init__.py CHANGED Viewed

@@ -5,8 +5,10 @@ from datachain.lib.dc import (
     DataChain,
     Sys,
     datasets,
+    delete_dataset,
     listings,
     read_csv,
+    read_database,
     read_dataset,
     read_hf,
     read_json,
@@ -61,11 +63,13 @@ __all__ = [
     "VideoFragment",
     "VideoFrame",
     "datasets",
+    "delete_dataset",
     "is_chain_type",
     "listings",
     "metrics",
     "param",
     "read_csv",
+    "read_database",
     "read_dataset",
     "read_hf",
     "read_json",

datachain/catalog/catalog.py CHANGED Viewed

@@ -776,7 +776,7 @@ class Catalog:
         listing: Optional[bool] = False,
         uuid: Optional[str] = None,
         description: Optional[str] = None,
-        labels: Optional[list[str]] = None,
+        attrs: Optional[list[str]] = None,
     ) -> "DatasetRecord":
         """
         Creates new dataset of a specific version.
@@ -794,16 +794,16 @@ class Catalog:
             dataset = self.get_dataset(name)
             default_version = dataset.next_version
-            if (description or labels) and (
-                dataset.description != description or dataset.labels != labels
+            if (description or attrs) and (
+                dataset.description != description or dataset.attrs != attrs
             ):
                 description = description or dataset.description
-                labels = labels or dataset.labels
+                attrs = attrs or dataset.attrs
                 self.update_dataset(
                     dataset,
                     description=description,
-                    labels=labels,
+                    attrs=attrs,
                 )
         except DatasetNotFoundError:
@@ -817,7 +817,7 @@ class Catalog:
                 schema=schema,
                 ignore_if_exists=True,
                 description=description,
-                labels=labels,
+                attrs=attrs,
             )
         version = version or default_version
@@ -1299,7 +1299,17 @@ class Catalog:
         name: str,
         version: Optional[int] = None,
         force: Optional[bool] = False,
+        studio: Optional[bool] = False,
     ):
+        from datachain.remote.studio import StudioClient
+        if studio:
+            client = StudioClient()
+            response = client.rm_dataset(name, version=version, force=force)
+            if not response.ok:
+                raise DataChainError(response.message)
+            return
         dataset = self.get_dataset(name)
         if not version and not force:
             raise ValueError(f"Missing dataset version from input for dataset {name}")
@@ -1324,15 +1334,15 @@ class Catalog:
         name: str,
         new_name: Optional[str] = None,
         description: Optional[str] = None,
-        labels: Optional[list[str]] = None,
+        attrs: Optional[list[str]] = None,
     ) -> DatasetRecord:
         update_data = {}
         if new_name:
             update_data["name"] = new_name
         if description is not None:
             update_data["description"] = description
-        if labels is not None:
-            update_data["labels"] = labels  # type: ignore[assignment]
+        if attrs is not None:
+            update_data["attrs"] = attrs  # type: ignore[assignment]
         dataset = self.get_dataset(name)
         return self.update_dataset(dataset, **update_data)

datachain/catalog/loader.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+import sys
 from importlib import import_module
 from typing import TYPE_CHECKING, Any, Optional
@@ -15,6 +16,7 @@ METASTORE_ARG_PREFIX = "DATACHAIN_METASTORE_ARG_"
 WAREHOUSE_SERIALIZED = "DATACHAIN__WAREHOUSE"
 WAREHOUSE_IMPORT_PATH = "DATACHAIN_WAREHOUSE"
 WAREHOUSE_ARG_PREFIX = "DATACHAIN_WAREHOUSE_ARG_"
+DISTRIBUTED_IMPORT_PYTHONPATH = "DATACHAIN_DISTRIBUTED_PYTHONPATH"
 DISTRIBUTED_IMPORT_PATH = "DATACHAIN_DISTRIBUTED"
 IN_MEMORY_ERROR_MESSAGE = "In-memory is only supported on SQLite"
@@ -100,19 +102,21 @@ def get_warehouse(in_memory: bool = False) -> "AbstractWarehouse":
     return warehouse_class(**warehouse_args)
-def get_udf_distributor_class() -> type["AbstractUDFDistributor"]:
-    distributed_import_path = os.environ.get(DISTRIBUTED_IMPORT_PATH)
+def get_udf_distributor_class() -> Optional[type["AbstractUDFDistributor"]]:
+    if not (distributed_import_path := os.environ.get(DISTRIBUTED_IMPORT_PATH)):
+        return None
-    if not distributed_import_path:
-        raise RuntimeError(
-            f"{DISTRIBUTED_IMPORT_PATH} import path is required "
-            "for distributed UDF processing."
-        )
     # Distributed class paths are specified as (for example): module.classname
     if "." not in distributed_import_path:
         raise RuntimeError(
             f"Invalid {DISTRIBUTED_IMPORT_PATH} import path: {distributed_import_path}"
         )
+    # Optional: set the Python path to look for the module
+    distributed_import_pythonpath = os.environ.get(DISTRIBUTED_IMPORT_PYTHONPATH)
+    if distributed_import_pythonpath and distributed_import_pythonpath not in sys.path:
+        sys.path.insert(0, distributed_import_pythonpath)
     module_name, _, class_name = distributed_import_path.rpartition(".")
     distributed = import_module(module_name)
     return getattr(distributed, class_name)

datachain/cli/__init__.py CHANGED Viewed

@@ -149,7 +149,7 @@ def handle_dataset_command(args, catalog):
             args.name,
             new_name=args.new_name,
             description=args.description,
-            labels=args.labels,
+            attrs=args.attrs,
             studio=args.studio,
             local=args.local,
             all=args.all,

datachain/cli/commands/datasets.py CHANGED Viewed

@@ -154,7 +154,7 @@ def edit_dataset(
     name: str,
     new_name: Optional[str] = None,
     description: Optional[str] = None,
-    labels: Optional[list[str]] = None,
+    attrs: Optional[list[str]] = None,
     studio: bool = False,
     local: bool = False,
     all: bool = True,
@@ -167,9 +167,9 @@ def edit_dataset(
     if all or local:
         try:
-            catalog.edit_dataset(name, new_name, description, labels)
+            catalog.edit_dataset(name, new_name, description, attrs)
         except DatasetNotFoundError:
             print("Dataset not found in local", file=sys.stderr)
     if (all or studio) and token:
-        edit_studio_dataset(team, name, new_name, description, labels)
+        edit_studio_dataset(team, name, new_name, description, attrs)

datachain/cli/commands/show.py CHANGED Viewed

@@ -42,8 +42,8 @@ def show(
     print("Name: ", name)
     if dataset.description:
         print("Description: ", dataset.description)
-    if dataset.labels:
-        print("Labels: ", ",".join(dataset.labels))
+    if dataset.attrs:
+        print("Attributes: ", ",".join(dataset.attrs))
     print("\n")
     show_records(records, collapse_columns=not no_collapse, hidden_fields=hidden_fields)

datachain/cli/parser/__init__.py CHANGED Viewed

@@ -217,9 +217,9 @@ def get_parser() -> ArgumentParser:  # noqa: PLR0915
         help="Dataset description",
     )
     parse_edit_dataset.add_argument(
-        "--labels",
+        "--attrs",
         nargs="+",
-        help="Dataset labels",
+        help="Dataset attributes",
     )
     parse_edit_dataset.add_argument(
         "--studio",

datachain/data_storage/metastore.py CHANGED Viewed

@@ -120,7 +120,7 @@ class AbstractMetastore(ABC, Serializable):
         schema: Optional[dict[str, Any]] = None,
         ignore_if_exists: bool = False,
         description: Optional[str] = None,
-        labels: Optional[list[str]] = None,
+        attrs: Optional[list[str]] = None,
     ) -> DatasetRecord:
         """Creates new dataset."""
@@ -326,7 +326,7 @@ class AbstractDBMetastore(AbstractMetastore):
             Column("id", Integer, primary_key=True),
             Column("name", Text, nullable=False),
             Column("description", Text),
-            Column("labels", JSON, nullable=True),
+            Column("attrs", JSON, nullable=True),
             Column("status", Integer, nullable=False),
             Column("feature_schema", JSON, nullable=True),
             Column("created_at", DateTime(timezone=True)),
@@ -521,7 +521,7 @@ class AbstractDBMetastore(AbstractMetastore):
         schema: Optional[dict[str, Any]] = None,
         ignore_if_exists: bool = False,
         description: Optional[str] = None,
-        labels: Optional[list[str]] = None,
+        attrs: Optional[list[str]] = None,
         **kwargs,  # TODO registered = True / False
     ) -> DatasetRecord:
         """Creates new dataset."""
@@ -538,7 +538,7 @@ class AbstractDBMetastore(AbstractMetastore):
             query_script=query_script,
             schema=json.dumps(schema or {}),
             description=description,
-            labels=json.dumps(labels or []),
+            attrs=json.dumps(attrs or []),
         )
         if ignore_if_exists and hasattr(query, "on_conflict_do_nothing"):
             # SQLite and PostgreSQL both support 'on_conflict_do_nothing',
@@ -621,7 +621,7 @@ class AbstractDBMetastore(AbstractMetastore):
         dataset_values = {}
         for field, value in kwargs.items():
             if field in self._dataset_fields[1:]:
-                if field in ["labels", "schema"]:
+                if field in ["attrs", "schema"]:
                     values[field] = json.dumps(value) if value else None
                 else:
                     values[field] = value

datachain/dataset.py CHANGED Viewed

@@ -329,7 +329,7 @@ class DatasetRecord:
     id: int
     name: str
     description: Optional[str]
-    labels: list[str]
+    attrs: list[str]
     schema: dict[str, Union[SQLType, type[SQLType]]]
     feature_schema: dict
     versions: list[DatasetVersion]
@@ -357,7 +357,7 @@ class DatasetRecord:
         id: int,
         name: str,
         description: Optional[str],
-        labels: str,
+        attrs: str,
         status: int,
         feature_schema: Optional[str],
         created_at: datetime,
@@ -387,7 +387,7 @@ class DatasetRecord:
         version_schema: str,
         version_job_id: Optional[str] = None,
     ) -> "DatasetRecord":
-        labels_lst: list[str] = json.loads(labels) if labels else []
+        attrs_lst: list[str] = json.loads(attrs) if attrs else []
         schema_dct: dict[str, Any] = json.loads(schema) if schema else {}
         version_schema_dct: dict[str, str] = (
             json.loads(version_schema) if version_schema else {}
@@ -418,7 +418,7 @@ class DatasetRecord:
             id,
             name,
             description,
-            labels_lst,
+            attrs_lst,
             cls.parse_schema(schema_dct),  # type: ignore[arg-type]
             json.loads(feature_schema) if feature_schema else {},
             [dataset_version],
@@ -562,7 +562,7 @@ class DatasetListRecord:
     id: int
     name: str
     description: Optional[str]
-    labels: list[str]
+    attrs: list[str]
     versions: list[DatasetListVersion]
     created_at: Optional[datetime] = None
@@ -572,7 +572,7 @@ class DatasetListRecord:
         id: int,
         name: str,
         description: Optional[str],
-        labels: str,
+        attrs: str,
         created_at: datetime,
         version_id: int,
         version_uuid: str,
@@ -588,7 +588,7 @@ class DatasetListRecord:
         version_query_script: Optional[str],
         version_job_id: Optional[str] = None,
     ) -> "DatasetListRecord":
-        labels_lst: list[str] = json.loads(labels) if labels else []
+        attrs_lst: list[str] = json.loads(attrs) if attrs else []
         dataset_version = DatasetListVersion.parse(
             version_id,
@@ -610,7 +610,7 @@ class DatasetListRecord:
             id,
             name,
             description,
-            labels_lst,
+            attrs_lst,
             [dataset_version],
             created_at,
         )

datachain/lib/convert/values_to_tuples.py CHANGED Viewed

@@ -1,5 +1,6 @@
+import itertools
 from collections.abc import Sequence
-from typing import Any, Union
+from typing import Any, Optional, Union
 from datachain.lib.data_model import (
     DataType,
@@ -66,21 +67,29 @@ def values_to_tuples(  # noqa: C901, PLR0912
                     f"signal '{k}' is not present in the output",
                 )
         else:
-            if len_ == 0:
-                raise ValuesToTupleError(ds_name, f"signal '{k}' is empty list")
-            first_element = next(iter(v))
-            typ = type(first_element)
-            if not is_chain_type(typ):
-                raise ValuesToTupleError(
-                    ds_name,
-                    f"signal '{k}' has unsupported type '{typ.__name__}'."
-                    f" Please use DataModel types: {DataTypeNames}",
+            # FIXME: Stops as soon as it finds the first non-None value.
+            # If a non-None value appears early, it won't check the remaining items for
+            # `None` values.
+            try:
+                pos, first_not_none_element = next(
+                    itertools.dropwhile(lambda pair: pair[1] is None, enumerate(v))
                 )
-            if isinstance(first_element, list):
-                types_map[k] = list[type(first_element[0])]  # type: ignore[assignment, misc]
+            except StopIteration:
+                typ = str  # default to str if all values are None or has length 0
+                nullable = True
             else:
-                types_map[k] = typ
+                nullable = pos > 0
+                typ = type(first_not_none_element)  # type: ignore[assignment]
+                if not is_chain_type(typ):
+                    raise ValuesToTupleError(
+                        ds_name,
+                        f"signal '{k}' has unsupported type '{typ.__name__}'."
+                        f" Please use DataModel types: {DataTypeNames}",
+                    )
+                if isinstance(first_not_none_element, list):
+                    typ = list[type(first_not_none_element[0])]  # type: ignore[assignment, misc]
+            types_map[k] = Optional[typ] if nullable else typ  # type: ignore[assignment]
         if length < 0:
             length = len_

datachain/lib/dataset_info.py CHANGED Viewed

@@ -32,11 +32,28 @@ class DatasetInfo(DataModel):
     metrics: dict[str, Any] = Field(default={})
     error_message: str = Field(default="")
     error_stack: str = Field(default="")
+    attrs: list[str] = Field(default=[])
     @property
     def is_temp(self) -> bool:
         return Session.is_temp_dataset(self.name)
+    def has_attr(self, attr: str) -> bool:
+        s = attr.split("=")
+        if len(s) == 1:
+            return attr in self.attrs
+        name = s[0]
+        value = s[1]
+        for a in self.attrs:
+            s = a.split("=")
+            if value == "*" and s[0] == name:
+                return True
+            if len(s) == 2 and s[0] == name and s[1] == value:
+                return True
+        return False
     @staticmethod
     def _validate_dict(
         v: Optional[Union[str, dict]],
@@ -83,4 +100,5 @@ class DatasetInfo(DataModel):
             metrics=job.metrics if job else {},
             error_message=version.error_message,
             error_stack=version.error_stack,
+            attrs=dataset.attrs,
         )

datachain/lib/dc/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from .csv import read_csv
+from .database import read_database
 from .datachain import C, Column, DataChain
-from .datasets import datasets, read_dataset
+from .datasets import datasets, delete_dataset, read_dataset
 from .hf import read_hf
 from .json import read_json
 from .listings import listings
@@ -19,8 +20,10 @@ __all__ = [
     "DatasetPrepareError",
     "Sys",
     "datasets",
+    "delete_dataset",
     "listings",
     "read_csv",
+    "read_database",
     "read_dataset",
     "read_hf",
     "read_json",

datachain/lib/dc/database.py ADDED Viewed

@@ -0,0 +1,151 @@
+import contextlib
+import itertools
+import os
+import sqlite3
+from typing import TYPE_CHECKING, Any, Optional, Union
+import sqlalchemy
+if TYPE_CHECKING:
+    from collections.abc import Iterator, Mapping, Sequence
+    import sqlalchemy.orm  # noqa: TC004
+    from datachain.lib.data_model import DataType
+    from datachain.query import Session
+    from .datachain import DataChain
+    ConnectionType = Union[
+        str,
+        sqlalchemy.engine.URL,
+        sqlalchemy.engine.interfaces.Connectable,
+        sqlalchemy.engine.Engine,
+        sqlalchemy.engine.Connection,
+        sqlalchemy.orm.Session,
+        sqlite3.Connection,
+    ]
+@contextlib.contextmanager
+def _connect(
+    connection: "ConnectionType",
+) -> "Iterator[Union[sqlalchemy.engine.Connection, sqlalchemy.orm.Session]]":
+    import sqlalchemy.orm
+    with contextlib.ExitStack() as stack:
+        engine_kwargs = {"echo": bool(os.environ.get("DEBUG_SHOW_SQL_QUERIES"))}
+        if isinstance(connection, (str, sqlalchemy.URL)):
+            engine = sqlalchemy.create_engine(connection, **engine_kwargs)
+            stack.callback(engine.dispose)
+            yield stack.enter_context(engine.connect())
+        elif isinstance(connection, sqlite3.Connection):
+            engine = sqlalchemy.create_engine(
+                "sqlite://", creator=lambda: connection, **engine_kwargs
+            )
+            # do not close the connection, as it is managed by the caller
+            yield engine.connect()
+        elif isinstance(connection, sqlalchemy.Engine):
+            yield stack.enter_context(connection.connect())
+        elif isinstance(connection, (sqlalchemy.Connection, sqlalchemy.orm.Session)):
+            # do not close the connection, as it is managed by the caller
+            yield connection
+        else:
+            raise TypeError(f"Unsupported connection type: {type(connection).__name__}")
+def _infer_schema(
+    result: "sqlalchemy.engine.Result",
+    to_infer: list[str],
+    infer_schema_length: Optional[int] = 100,
+) -> tuple[list["sqlalchemy.Row"], dict[str, "DataType"]]:
+    from datachain.lib.convert.values_to_tuples import values_to_tuples
+    if not to_infer:
+        return [], {}
+    rows = list(itertools.islice(result, infer_schema_length))
+    values = {col: [row._mapping[col] for row in rows] for col in to_infer}
+    _, output_schema, _ = values_to_tuples("", **values)
+    return rows, output_schema
+def read_database(
+    query: Union[str, "sqlalchemy.sql.expression.Executable"],
+    connection: "ConnectionType",
+    params: Union["Sequence[Mapping[str, Any]]", "Mapping[str, Any]", None] = None,
+    *,
+    output: Optional["dict[str, DataType]"] = None,
+    session: Optional["Session"] = None,
+    settings: Optional[dict] = None,
+    in_memory: bool = False,
+    infer_schema_length: Optional[int] = 100,
+) -> "DataChain":
+    """
+    Read the results of a SQL query into a DataChain, using a given database connection.
+    Args:
+        query:
+            The SQL query to execute. Can be a raw SQL string or a SQLAlchemy
+            `Executable` object.
+        connection: SQLAlchemy connectable, str, or a sqlite3 connection
+            Using SQLAlchemy makes it possible to use any DB supported by that
+            library. If a DBAPI2 object, only sqlite3 is supported. The user is
+            responsible for engine disposal and connection closure for the
+            SQLAlchemy connectable; str connections are closed automatically.
+        params: Parameters to pass to execute method.
+        output: A dictionary mapping column names to types, used to override the
+            schema inferred from the query results.
+        session: Session to use for the chain.
+        settings: Settings to use for the chain.
+        in_memory: If True, creates an in-memory session. Defaults to False.
+        infer_schema_length:
+            The maximum number of rows to scan for inferring schema.
+            If set to `None`, the full data may be scanned.
+            The rows used for schema inference are stored in memory,
+            so large values can lead to high memory usage.
+            Only applies if the `output` parameter is not set for the given column.
+    Examples:
+        Reading from a SQL query against a user-supplied connection:
+        ```python
+        query = "SELECT key, value FROM tbl"
+        chain = dc.read_database(query, connection, output={"value": float})
+        ```
+        Load data from a SQLAlchemy driver/engine:
+        ```python
+        from sqlalchemy import create_engine
+        engine = create_engine("postgresql+psycopg://myuser:mypassword@localhost:5432/mydb")
+        chain = dc.read_database("select * from tbl", engine)
+        ```
+        Load data from a parameterized SQLAlchemy query:
+        ```python
+        query = "SELECT key, value FROM tbl WHERE value > :value"
+        dc.read_database(query, engine, params={"value": 50})
+        ```
+    Notes:
+        This function works with a variety of databases — including, but not limited to,
+        SQLite, DuckDB, PostgreSQL, and Snowflake, provided the appropriate driver is
+        installed.
+    """
+    from datachain.lib.dc.records import read_records
+    output = output or {}
+    if isinstance(query, str):
+        query = sqlalchemy.text(query)
+    kw = {"execution_options": {"stream_results": True}}  # use server-side cursors
+    with _connect(connection) as conn, conn.execute(query, params, **kw) as result:
+        cols = result.keys()
+        to_infer = [k for k in cols if k not in output]  # preserve the order
+        rows, inferred_schema = _infer_schema(result, to_infer, infer_schema_length)
+        records = (row._asdict() for row in itertools.chain(rows, result))
+        return read_records(
+            records,
+            session=session,
+            settings=settings,
+            in_memory=in_memory,
+            schema=inferred_schema | output,
+        )

datachain/lib/dc/datachain.py CHANGED Viewed

@@ -133,7 +133,7 @@ class DataChain:
                 .choices[0]
                 .message.content,
             )
-            .save()
+            .persist()
         )
         try:
@@ -443,22 +443,33 @@ class DataChain:
         )
         return listings(*args, **kwargs)
+    def persist(self) -> "Self":
+        """Saves temporary chain that will be removed after the process ends.
+        Temporary datasets are useful for optimization, for example when we have
+        multiple chains starting with identical sub-chain. We can then persist that
+        common chain and use it to calculate other chains, to avoid re-calculation
+        every time.
+        It returns the chain itself.
+        """
+        schema = self.signals_schema.clone_without_sys_signals().serialize()
+        return self._evolve(query=self._query.save(feature_schema=schema))
     def save(  # type: ignore[override]
         self,
-        name: Optional[str] = None,
+        name: str,
         version: Optional[int] = None,
         description: Optional[str] = None,
-        labels: Optional[list[str]] = None,
+        attrs: Optional[list[str]] = None,
         **kwargs,
     ) -> "Self":
         """Save to a Dataset. It returns the chain itself.
         Parameters:
-            name : dataset name. Empty name saves to a temporary dataset that will be
-                removed after process ends. Temp dataset are useful for optimization.
+            name : dataset name.
             version : version of a dataset. Default - the last version that exist.
             description : description of a dataset.
-            labels : labels of a dataset.
+            attrs : attributes of a dataset. They can be without value, e.g "NLP",
+                or with a value, e.g "location=US".
         """
         schema = self.signals_schema.clone_without_sys_signals().serialize()
         return self._evolve(
@@ -466,7 +477,7 @@ class DataChain:
                 name=name,
                 version=version,
                 description=description,
-                labels=labels,
+                attrs=attrs,
                 feature_schema=schema,
                 **kwargs,
             )
@@ -1112,7 +1123,7 @@ class DataChain:
         if self._query.attached:
             chain = self
         else:
-            chain = self.save()
+            chain = self.persist()
         assert chain.name is not None  # for mypy
         return PytorchDataset(
             chain.name,

datachain/lib/dc/datasets.py CHANGED Viewed

@@ -102,6 +102,7 @@ def datasets(
     column: Optional[str] = None,
     include_listing: bool = False,
     studio: bool = False,
+    attrs: Optional[list[str]] = None,
 ) -> "DataChain":
     """Generate chain with list of registered datasets.
@@ -114,6 +115,10 @@ def datasets(
         include_listing: If True, includes listing datasets. Defaults to False.
         studio: If True, returns datasets from Studio only,
             otherwise returns all local datasets. Defaults to False.
+        attrs: Optional list of attributes to filter datasets on. It can be just
+            attribute without value e.g "NLP", or attribute with value
+            e.g "location=US". Attribute with value can also accept "*" to target
+            all that have specific name e.g "location=*"
     Returns:
         DataChain: A new DataChain instance containing dataset information.
@@ -139,6 +144,10 @@ def datasets(
     ]
     datasets_values = [d for d in datasets_values if not d.is_temp]
+    if attrs:
+        for attr in attrs:
+            datasets_values = [d for d in datasets_values if d.has_attr(attr)]
     if not column:
         # flattening dataset fields
         schema = {
@@ -166,3 +175,46 @@ def datasets(
         output={column: DatasetInfo},
         **{column: datasets_values},  # type: ignore[arg-type]
     )
+def delete_dataset(
+    name: str,
+    version: Optional[int] = None,
+    force: Optional[bool] = False,
+    studio: Optional[bool] = False,
+    session: Optional[Session] = None,
+    in_memory: bool = False,
+) -> None:
+    """Removes specific dataset version or all dataset versions, depending on
+    a force flag.
+    Args:
+        name : Dataset name
+        version : Optional dataset version
+        force: If true, all datasets versions will be removed. Defaults to False.
+        studio: If True, removes dataset from Studio only,
+            otherwise remove from local. Defaults to False.
+        session: Optional session instance. If not provided, uses default session.
+        in_memory: If True, creates an in-memory session. Defaults to False.
+    Returns: None
+    Example:
+        ```py
+        import datachain as dc
+        dc.delete_dataset("cats")
+        ```
+        ```py
+        import datachain as dc
+        dc.delete_dataset("cats", version=1)
+        ```
+    """
+    session = Session.get(session, in_memory=in_memory)
+    catalog = session.catalog
+    if not force:
+        version = version or catalog.get_dataset(name).latest_version
+    else:
+        version = None
+    catalog.remove_dataset(name, version=version, force=force, studio=studio)

datachain/lib/dc/pandas.py CHANGED Viewed

@@ -37,7 +37,14 @@ def read_pandas(  # type: ignore[override]
     """
     from .utils import DatasetPrepareError
-    fr_map = {col.lower(): df[col].tolist() for col in df.columns}
+    def get_col_name(col):
+        if isinstance(col, tuple):
+            # Join tuple elements with underscore for MultiIndex columns
+            return "_".join(map(str, col)).lower()
+        # Handle regular string column names
+        return str(col).lower()
+    fr_map = {get_col_name(col): df[col].tolist() for col in df.columns}
     for c in fr_map:
         if not c.isidentifier():

datachain/lib/dc/records.py CHANGED Viewed

@@ -1,8 +1,5 @@
-from typing import (
-    TYPE_CHECKING,
-    Optional,
-    Union,
-)
+from collections.abc import Iterable
+from typing import TYPE_CHECKING, Optional, Union
 import sqlalchemy
@@ -12,6 +9,7 @@ from datachain.lib.file import (
 )
 from datachain.lib.signal_schema import SignalSchema
 from datachain.query import Session
+from datachain.query.schema import Column
 if TYPE_CHECKING:
     from typing_extensions import ParamSpec
@@ -22,7 +20,7 @@ if TYPE_CHECKING:
 def read_records(
-    to_insert: Optional[Union[dict, list[dict]]],
+    to_insert: Optional[Union[dict, Iterable[dict]]],
     session: Optional[Session] = None,
     settings: Optional[dict] = None,
     in_memory: bool = False,
@@ -54,10 +52,11 @@ def read_records(
     if schema:
         signal_schema = SignalSchema(schema)
-        columns = [
-            sqlalchemy.Column(c.name, c.type)  # type: ignore[union-attr]
-            for c in signal_schema.db_signals(as_columns=True)  # type: ignore[assignment]
-        ]
+        columns = []
+        for c in signal_schema.db_signals(as_columns=True):
+            assert isinstance(c, Column)
+            kw = {"nullable": c.nullable} if c.nullable is not None else {}
+            columns.append(sqlalchemy.Column(c.name, c.type, **kw))
     else:
         columns = [
             sqlalchemy.Column(name, typ)
@@ -83,8 +82,7 @@ def read_records(
     warehouse = catalog.warehouse
     dr = warehouse.dataset_rows(dsr)
-    db = warehouse.db
-    insert_q = dr.get_table().insert()
-    for record in to_insert:
-        db.execute(insert_q.values(**record))
+    table = dr.get_table()
+    warehouse.insert_rows(table, to_insert)
+    warehouse.insert_rows_done(table)
     return read_dataset(name=dsr.name, session=session, settings=settings)

datachain/lib/signal_schema.py CHANGED Viewed

@@ -581,7 +581,11 @@ class SignalSchema:
         signals = [
             DEFAULT_DELIMITER.join(path)
             if not as_columns
-            else Column(DEFAULT_DELIMITER.join(path), python_to_sql(_type))
+            else Column(
+                DEFAULT_DELIMITER.join(path),
+                python_to_sql(_type),
+                nullable=is_optional(_type),
+            )
             for path, _type, has_subtree, _ in self.get_flat_tree(
                 include_hidden=include_hidden
             )
@@ -990,3 +994,8 @@ class SignalSchema:
             }
         return SignalSchema.deserialize(schema)
+def is_optional(type_: Any) -> bool:
+    """Check if a type is Optional."""
+    return get_origin(type_) is Union and type(None) in get_args(type_)

datachain/lib/udf.py CHANGED Viewed

@@ -474,8 +474,9 @@ class Generator(UDFBase):
             remove_prefetched=bool(self.prefetch) and not cache,
         )
         with closing(prepared_inputs):
-            for row in processed_cb.wrap(prepared_inputs):
+            for row in prepared_inputs:
                 yield _process_row(row)
+                processed_cb.relative_update(1)
         self.teardown()

datachain/query/dataset.py CHANGED Viewed

@@ -437,9 +437,17 @@ class UDFStep(Step, ABC):
                             "distributed processing."
                         )
-                    from datachain.catalog.loader import get_udf_distributor_class
+                    from datachain.catalog.loader import (
+                        DISTRIBUTED_IMPORT_PATH,
+                        get_udf_distributor_class,
+                    )
+                    if not (udf_distributor_class := get_udf_distributor_class()):
+                        raise RuntimeError(
+                            f"{DISTRIBUTED_IMPORT_PATH} import path is required "
+                            "for distributed UDF processing."
+                        )
-                    udf_distributor_class = get_udf_distributor_class()
                     udf_distributor = udf_distributor_class(
                         catalog=catalog,
                         table=udf_table,
@@ -1162,16 +1170,6 @@ class DatasetQuery:
         )
         return sqlalchemy.table(table_name)
-    @staticmethod
-    def delete(
-        name: str, version: Optional[int] = None, catalog: Optional["Catalog"] = None
-    ) -> None:
-        from datachain.catalog import get_catalog
-        catalog = catalog or get_catalog()
-        version = version or catalog.get_dataset(name).latest_version
-        catalog.remove_dataset(name, version)
     @property
     def attached(self) -> bool:
         """
@@ -1682,7 +1680,7 @@ class DatasetQuery:
         version: Optional[int] = None,
         feature_schema: Optional[dict] = None,
         description: Optional[str] = None,
-        labels: Optional[list[str]] = None,
+        attrs: Optional[list[str]] = None,
         **kwargs,
     ) -> "Self":
         """Save the query as a dataset."""
@@ -1716,7 +1714,7 @@ class DatasetQuery:
                 feature_schema=feature_schema,
                 columns=columns,
                 description=description,
-                labels=labels,
+                attrs=attrs,
                 **kwargs,
             )
             version = version or dataset.latest_version

datachain/query/dispatch.py CHANGED Viewed

@@ -13,7 +13,7 @@ from multiprocess import get_context
 from datachain.catalog import Catalog
 from datachain.catalog.catalog import clone_catalog_with_cache
-from datachain.catalog.loader import get_udf_distributor_class
+from datachain.catalog.loader import DISTRIBUTED_IMPORT_PATH, get_udf_distributor_class
 from datachain.lib.udf import _get_cache
 from datachain.query.batch import RowsOutput, RowsOutputBatch
 from datachain.query.dataset import (
@@ -91,7 +91,12 @@ def udf_entrypoint() -> int:
 def udf_worker_entrypoint() -> int:
-    return get_udf_distributor_class().run_worker()
+    if not (udf_distributor_class := get_udf_distributor_class()):
+        raise RuntimeError(
+            f"{DISTRIBUTED_IMPORT_PATH} import path is required "
+            "for distributed UDF processing."
+        )
+    return udf_distributor_class.run_worker()
 class UDFDispatcher:

datachain/query/schema.py CHANGED Viewed

@@ -40,12 +40,15 @@ class ColumnMeta(type):
 class Column(sa.ColumnClause, metaclass=ColumnMeta):
     inherit_cache: Optional[bool] = True
-    def __init__(self, text, type_=None, is_literal=False, _selectable=None):
+    def __init__(
+        self, text, type_=None, is_literal=False, nullable=None, _selectable=None
+    ):
         """Dataset column."""
         self.name = ColumnMeta.to_db_name(text)
         super().__init__(
             self.name, type_=type_, is_literal=is_literal, _selectable=_selectable
         )
+        self.nullable = nullable
     def __getattr__(self, name: str):
         return Column(self.name + DEFAULT_DELIMITER + name)

datachain/remote/studio.py CHANGED Viewed

@@ -290,13 +290,13 @@ class StudioClient:
         name: str,
         new_name: Optional[str] = None,
         description: Optional[str] = None,
-        labels: Optional[list[str]] = None,
+        attrs: Optional[list[str]] = None,
     ) -> Response[DatasetInfoData]:
         body = {
             "new_name": new_name,
             "dataset_name": name,
             "description": description,
-            "labels": labels,
+            "attrs": attrs,
         }
         return self._send_request(

datachain/studio.py CHANGED Viewed

@@ -187,10 +187,10 @@ def edit_studio_dataset(
     name: str,
     new_name: Optional[str] = None,
     description: Optional[str] = None,
-    labels: Optional[list[str]] = None,
+    attrs: Optional[list[str]] = None,
 ):
     client = StudioClient(team=team_name)
-    response = client.edit_dataset(name, new_name, description, labels)
+    response = client.edit_dataset(name, new_name, description, attrs)
     if not response.ok:
         raise DataChainError(response.message)

{datachain-0.14.5.dist-info → datachain-0.16.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: datachain
-Version: 0.14.5
+Version: 0.16.0
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License-Expression: Apache-2.0

{datachain-0.14.5.dist-info → datachain-0.16.0.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-datachain/__init__.py,sha256=h3W0agyTcpXOfMA26jZyHo-Gs7vLXhbR-9uEkzK8Szk,1414
+datachain/__init__.py,sha256=Dx_Dw6AuvC_CZtXxfRv0Z-ND6ieC4Cz-tZkMW-Rvmz4,1496
 datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
 datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
 datachain/cache.py,sha256=yQblPhOh_Mq74Ma7xT1CL1idLJ0HgrQxpGVYvRy_9Eg,3623
 datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
-datachain/dataset.py,sha256=ZfgsGlddTXsSqCohNSRSChdH6Jjw7wrkso1Am166k-M,19391
+datachain/dataset.py,sha256=msBC62M_HAv3hT4tKFEGOlH3sMCMg5DVd5lhmqkDGB4,19379
 datachain/error.py,sha256=bxAAL32lSeMgzsQDEHbGTGORj-mPzzpCRvWDPueJNN4,1092
 datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
 datachain/listing.py,sha256=kNSCFYWo2iM1wWg1trwq4WpYZxYqz4RKxkTtsppEzAw,7079
@@ -13,24 +13,24 @@ datachain/nodes_thread_pool.py,sha256=mdo0s-VybuSZkRUARcUO4Tjh8KFfZr9foHqmupx2Sm
 datachain/progress.py,sha256=lRzxoYP4Qv2XBwD78sOkmYRzHFpZ2ExVNJF8wAeICtY,770
 datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
-datachain/studio.py,sha256=9MEpFPLKI3gG4isKklcfD5BMLeNsSXhtOUboOjW4Fdc,10017
+datachain/studio.py,sha256=CwXrZ3PXJFIoilelIHblDV05kzcWj9vbV3KanMPVrRQ,10015
 datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
 datachain/utils.py,sha256=8Qz8lRrX0bUTGvwYd-OR-l6ElVRsQBdBO5QMvwt56T4,15190
 datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
-datachain/catalog/catalog.py,sha256=05_JplTuoyqDWtxUeu324ogaHVqXGPSaPxtUXtuMljk,60682
+datachain/catalog/catalog.py,sha256=drCemStFXk2MZgexbUsSIBJuUvn0YwL1tJO69KrWeeg,61004
 datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
-datachain/catalog/loader.py,sha256=wCOWeDwuFNKr_frZRkqTZhkCAiB0CBCRJio3LF2zKPA,5765
-datachain/cli/__init__.py,sha256=YPVkuQ7IezNhtzo5xrfca1hEIiZtFxOlJCOzAOEuxmA,8335
+datachain/catalog/loader.py,sha256=UXjYD6BNRoupPvkiz3-b04jepXhtLHCA4gzKFnXxOtQ,5987
+datachain/cli/__init__.py,sha256=i40xHzVZP3iZFBw3UixQ2OU-s_GQq6OyvQ-_6opwIYc,8333
 datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
 datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQaRlIhI,493
-datachain/cli/commands/datasets.py,sha256=865ui6q4UVPbL_-jk18C-lYi_bGMlh7XhfRaHbbNyhk,5796
+datachain/cli/commands/datasets.py,sha256=sQ83zxHLuP04cXqBYD3iVcsr49LHA3lnjYxdL142HMk,5793
 datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
 datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR88,198
 datachain/cli/commands/ls.py,sha256=dSD2_MHng4t9HRFJZWMOCjPL4XU3qaBV3piNl8UXP08,5275
 datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibVE,600
 datachain/cli/commands/query.py,sha256=2S7hQxialt1fkbocxi6JXZI6jS5QnFrD1aOjKgZkzfI,1471
-datachain/cli/commands/show.py,sha256=P6e6bYiRCyVKO0ggnoFkLkwGmBWlrlm8W5c_sBNxBBw,1604
-datachain/cli/parser/__init__.py,sha256=rtjlqSsDd4LZH9WdgvluO27M4sID1wD7YkQ4cKhNXzw,15721
+datachain/cli/commands/show.py,sha256=K__cCLDJLTRt-sBTMxDID0A_4dFgRRMvjDrrVWcbMUQ,1606
+datachain/cli/parser/__init__.py,sha256=SKB94ZS9kRHV7UOrQcIXsSQ7BOFlp4U2To4wseXXcaI,15724
 datachain/cli/parser/job.py,sha256=kvQkSfieyUmvJpOK8p78UgS8sygHhQXztRlOtVcgtaU,3449
 datachain/cli/parser/studio.py,sha256=Y-1OlQGecLVi9QofvWUfSlPd2ISyaESf7QFGZqGsrdw,3609
 datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
@@ -45,7 +45,7 @@ datachain/client/s3.py,sha256=YCtDhKVO_jGsMPeyqe3xk5QsF5lqMabqkt0tPFWUHOM,7286
 datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
 datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
 datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
-datachain/data_storage/metastore.py,sha256=19LP15xT2Fmz0aIZ1sIajq8i1-KnFgCBEZeU2Ka9-mc,37780
+datachain/data_storage/metastore.py,sha256=bhfAaijM7p_D5ltMWg-CVEv9lTflL3bGUWqAmJ8qFbc,37774
 datachain/data_storage/schema.py,sha256=asZYz1cg_WKfe2Q-k5W51E2z2CzHU5B4QEDZDMFr8yo,9346
 datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
 datachain/data_storage/sqlite.py,sha256=f4tvq0gzYQP7aYGnfL3j4IBUNvctpBxI_ioFU-B1LFc,24540
@@ -69,7 +69,7 @@ datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/lib/arrow.py,sha256=9UBCF-lftQaz0yxdsjbLKbyzVSmrF_QSWdhp2oBDPqs,9486
 datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
 datachain/lib/data_model.py,sha256=ZwBXELtqROEdLL4DmxTipnwUZmhQvMz_UVDzyf7nQ9Y,2899
-datachain/lib/dataset_info.py,sha256=Jnjy7vq4iNVkq1e-SYjqxdojlxIDXvZ352NCLLZg59k,2633
+datachain/lib/dataset_info.py,sha256=Mmo3r_MWRb-47H4QueSaUqgeENJiJZmjkTYBMpRuKM8,3128
 datachain/lib/file.py,sha256=HLQXS_WULm7Y-fkHMy0WpibVAcrkLPRS6CrZy6rwFe0,30450
 datachain/lib/hf.py,sha256=gjxuStZBlKtNk3-4yYSlWZDv9zBGblOdvEy_Lwap5hA,5882
 datachain/lib/image.py,sha256=butvUY_33PVEYPKX2nVCPeJjJVcBaptZwsE9REQsTS8,3247
@@ -79,10 +79,10 @@ datachain/lib/meta_formats.py,sha256=Epydbdch1g4CojK8wd_ePzmwmljC4fVWlJtZ16jsX-A
 datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
 datachain/lib/pytorch.py,sha256=YS6yR13iVlrAXo5wzJswFFUHwWOql9KTdWIa86DXB-k,7712
 datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
-datachain/lib/signal_schema.py,sha256=uIBHYXtu_XpLbOUVC-kq-GduEOCfz9hQORi9ZG3JFqo,35820
+datachain/lib/signal_schema.py,sha256=rt5DpL6DptQEZ8NYe2x_v1C_QFO-lDVEUawxzSswKXw,36062
 datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
 datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
-datachain/lib/udf.py,sha256=JJwjvy41N65PtWGUAq7TYnhdOOR6RiMDUJEKl5xtwLc,16199
+datachain/lib/udf.py,sha256=zCdO5__gLMCgrdHmOvIa0eoWKCDAU1uO-MMAu_EU13o,16228
 datachain/lib/udf_signature.py,sha256=2EtsOPDNSPqcOlYwqbCdy6RF5MldI-7smii8aLy8p7Y,7543
 datachain/lib/utils.py,sha256=QrjVs_oLRXEotOPUYurBJypBFi_ReTJmxcnJeH4j2Uk,1596
 datachain/lib/video.py,sha256=suH_8Mi8VYk4-IVb1vjSduF_njs64ji1WGKHxDLnGYw,6629
@@ -93,17 +93,18 @@ datachain/lib/convert/flatten.py,sha256=IZFiUYbgXSxXhPSG5Cqf5IjnJ4ZDZKXMr4o_yCR1
 datachain/lib/convert/python_to_sql.py,sha256=wg-O5FRKX3x3Wh8ZL1b9ntMlgf1zRO4djMP3t8CHJLo,3188
 datachain/lib/convert/sql_to_python.py,sha256=XXCBYDQFUXJIBNWkjEP944cnCfJ8GF2Tji0DLF3A_zQ,315
 datachain/lib/convert/unflatten.py,sha256=ysMkstwJzPMWUlnxn-Z-tXJR3wmhjHeSN_P-sDcLS6s,2010
-datachain/lib/convert/values_to_tuples.py,sha256=EFfIGBiVVltJQG8blzsQ1dGXneh4D3wdLfSUeoK10OI,3931
-datachain/lib/dc/__init__.py,sha256=6rKKHS6MA3mS6UJXiysrv4TURs4R_UWAQK2tJ2t1QMs,743
+datachain/lib/convert/values_to_tuples.py,sha256=CJ7x91ZYrRMc1lr-BR5AYi7EkWHbzPu1bVqCiP6jLoY,4491
+datachain/lib/dc/__init__.py,sha256=HD0NYrdy44u6kkpvgGjJcvGz-UGTHui2azghcT8ZUg0,838
 datachain/lib/dc/csv.py,sha256=asWPAxhMgIoLAdD2dObDlnGL8CTSD3TAuFuM4ci89bQ,4374
-datachain/lib/dc/datachain.py,sha256=PDkB1fvmokJr-Tmyn0CuFGgZSxPn25FMjjUVHbrx6-c,76326
-datachain/lib/dc/datasets.py,sha256=K-GCTZ6Ps_XNpzKz19my8VijXb-b0b3eZASoavKk1Uc,5157
+datachain/lib/dc/database.py,sha256=gYKh1iO5hOWMPFTU1vZC5kOXkJzVse14TYTWE4_1iEA,5940
+datachain/lib/dc/datachain.py,sha256=aRTHaYMk2C1A3dslGpaaEmTvhwvbqnMNaWIBgdIWUX8,76847
+datachain/lib/dc/datasets.py,sha256=u6hlz0Eodh_s39TOW6kz0VIL3nGfadqu8FLoWqDxSJs,6890
 datachain/lib/dc/hf.py,sha256=PJl2wiLjdRsMz0SYbLT-6H8b-D5i2WjeH7li8HHOk_0,2145
 datachain/lib/dc/json.py,sha256=ZUThPDAaP2gBFIL5vsQTwKBcuN_dhvC_O44wdDv0jEc,2683
 datachain/lib/dc/listings.py,sha256=2na9v63xO1vPUNaoBSzA-TSN49V7zQAb-4iS1wOPLFE,1029
-datachain/lib/dc/pandas.py,sha256=mM2y44s1-3dwkxjVe6RdfT6PVoeRHS9OgsGaSz4YsqQ,1219
+datachain/lib/dc/pandas.py,sha256=ObueUXDUFKJGu380GmazdG02ARpKAHPhSaymfmOH13E,1489
 datachain/lib/dc/parquet.py,sha256=zYcSgrWwyEDW9UxGUSVdIVsCu15IGEf0xL8KfWQqK94,1782
-datachain/lib/dc/records.py,sha256=DOFkQV7A7kZnMiCS4mHOzee2ibWIhz-mWQpgVsU78SE,2524
+datachain/lib/dc/records.py,sha256=br5MTtD8mCrPpWXiyHXpYL-ChH9_tg0S-7ttAa8hH80,2634
 datachain/lib/dc/storage.py,sha256=QLf3-xMV2Gmy3AA8qF9WqAsb7R8Rk87l4s5hBoiCH98,5285
 datachain/lib/dc/utils.py,sha256=Ct-0FqCaDhNWHx09gJFcCXJGPjMI-VZr4t-GJyqTi44,3984
 datachain/lib/dc/values.py,sha256=cBQubhmPNEDMJldUXzGh-UKbdim4P6O2B91Gp39roKw,1389
@@ -118,17 +119,17 @@ datachain/model/ultralytics/pose.py,sha256=gXAWfAk4OWZl93hKcQPKZvqJa3nIrECB4RM8K
 datachain/model/ultralytics/segment.py,sha256=koq1HASo29isf0in6oSlzmU4IzsmOXe87F1ajQQVfh4,2911
 datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
 datachain/query/batch.py,sha256=6w8gzLTmLeylststu-gT5jIqEfi4-djS7_yTYyeo-fw,4190
-datachain/query/dataset.py,sha256=caUsFzaVZXOz8NmeTMeOdyRQLQP8KCnxYMxF-pG4yFQ,58712
-datachain/query/dispatch.py,sha256=ErdK-biHYhRLDsm7on6vAHSjX-hAHgEHsBRHmuMS_4E,12979
+datachain/query/dataset.py,sha256=0SKm8VaXYuzm06j53WK-vnB3-55jauJwq3QULPOooVU,58687
+datachain/query/dispatch.py,sha256=5p_jXxKJVCfIA4jLSQ0tAY1IhZUS3oJvyQXUH0Dk3bc,13215
 datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
 datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
 datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
-datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
+datachain/query/schema.py,sha256=fo_MdPXblMAtbB3kcZAQDzAUHWP2RfuPX2JWndeGGt8,6668
 datachain/query/session.py,sha256=wNdOHAi4HrsEihfzdcTlfB5i1xyj0dw6rlUz84StOoU,6512
 datachain/query/udf.py,sha256=ljAYaF-J77t7iS4zc1-g1ssYd4c6Q-ccKGEc3VQQmeM,1322
 datachain/query/utils.py,sha256=u0A_BwG9PNs0DxoDcvSWgWLpj3ByTUv8CqH13CIuGag,1293
 datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datachain/remote/studio.py,sha256=kzpOWnmtaeXlRXgHbZ7pxno-r0pSgwq2LJFGSY0u1UY,13110
+datachain/remote/studio.py,sha256=SCmsYURwqYTXfxQpizOoyxlPE2ECJv-sZWVitStRPgc,13107
 datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
 datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
 datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
@@ -150,9 +151,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
 datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
 datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
 datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
-datachain-0.14.5.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
-datachain-0.14.5.dist-info/METADATA,sha256=y6sL0tB9tFRXF_LnjkPLM7cmtBBhXWxTvtNWRnmgfb4,11328
-datachain-0.14.5.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-datachain-0.14.5.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
-datachain-0.14.5.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
-datachain-0.14.5.dist-info/RECORD,,
+datachain-0.16.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
+datachain-0.16.0.dist-info/METADATA,sha256=om4GIGxM-IQkuTWdISiHploZfvi4BmhAY8ywNdHtqYM,11328
+datachain-0.16.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+datachain-0.16.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
+datachain-0.16.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
+datachain-0.16.0.dist-info/RECORD,,

{datachain-0.14.5.dist-info → datachain-0.16.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{datachain-0.14.5.dist-info → datachain-0.16.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datachain-0.14.5.dist-info → datachain-0.16.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{datachain-0.14.5.dist-info → datachain-0.16.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

datachain 0.14.5__py3-none-any.whl → 0.16.0__py3-none-any.whl

Potentially problematic release.

datachain 0.14.5py3-none-any.whl → 0.16.0py3-none-any.whl