PyPI - datachain - Versions diffs - 0.2.17__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

datachain 0.2.17py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (27) hide show

datachain/cache.py +5 -10
datachain/catalog/catalog.py +9 -19
datachain/client/azure.py +5 -12
datachain/client/fsspec.py +5 -6
datachain/client/gcs.py +4 -14
datachain/client/local.py +2 -4
datachain/client/s3.py +4 -8
datachain/data_storage/schema.py +7 -15
datachain/data_storage/warehouse.py +26 -31
datachain/lib/convert/sql_to_python.py +13 -18
datachain/lib/dc.py +31 -5
datachain/lib/file.py +19 -18
datachain/lib/webdataset.py +2 -3
datachain/listing.py +14 -20
datachain/node.py +32 -21
datachain/query/builtins.py +5 -12
datachain/query/dataset.py +2 -2
datachain/query/schema.py +3 -7
datachain/sql/functions/__init__.py +3 -2
datachain/sql/functions/array.py +8 -0
datachain/sql/sqlite/base.py +5 -0
{datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/METADATA +1 -1
{datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/RECORD +27 -27
{datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/LICENSE +0 -0
{datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/WHEEL +0 -0
{datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/entry_points.txt +0 -0
{datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/top_level.txt +0 -0

datachain/cache.py CHANGED Viewed

@@ -24,8 +24,7 @@ sha256 = partial(hashlib.sha256, usedforsecurity=False)
 @attrs.frozen
 class UniqueId:
     storage: "StorageURI"
-    parent: str
-    name: str
+    path: str
     size: int
     etag: str
     version: str = ""
@@ -34,10 +33,6 @@ class UniqueId:
     location: Optional[str] = None
     last_modified: datetime = TIME_ZERO
-    @property
-    def path(self) -> str:
-        return f"{self.parent}/{self.name}" if self.parent else self.name
     def get_parsed_location(self) -> Optional[dict]:
         if not self.location:
             return None
@@ -53,10 +48,10 @@ class UniqueId:
         return loc_stack[0]
     def get_hash(self) -> str:
-        etag = f"{self.vtype}{self.location}" if self.vtype else self.etag
-        return sha256(
-            f"{self.storage}/{self.parent}/{self.name}/{self.version}/{etag}".encode()
-        ).hexdigest()
+        fingerprint = f"{self.storage}/{self.path}/{self.version}/{self.etag}"
+        if self.location:
+            fingerprint += f"/{self.location}"
+        return sha256(fingerprint.encode()).hexdigest()
 def try_scandir(path):

datachain/catalog/catalog.py CHANGED Viewed

@@ -529,21 +529,16 @@ def find_column_to_str(  # noqa: PLR0911
     if column == "du":
         return str(
             src.listing.du(
-                {
-                    f: row[field_lookup[f]]
-                    for f in ["dir_type", "size", "parent", "name"]
-                }
+                {f: row[field_lookup[f]] for f in ["dir_type", "size", "path"]}
             )[0]
         )
     if column == "name":
-        return row[field_lookup["name"]] or ""
+        return posixpath.basename(row[field_lookup["path"]]) or ""
     if column == "owner":
         return row[field_lookup["owner_name"]] or ""
     if column == "path":
         is_dir = row[field_lookup["dir_type"]] == DirType.DIR
-        parent = row[field_lookup["parent"]]
-        name = row[field_lookup["name"]]
-        path = f"{parent}/{name}" if parent else name
+        path = row[field_lookup["path"]]
         if is_dir and path:
             full_path = path + "/"
         else:
@@ -724,8 +719,7 @@ class Catalog:
         columns = [
             Column("vtype", String),
             Column("dir_type", Int),
-            Column("parent", String),
-            Column("name", String),
+            Column("path", String),
             Column("etag", String),
             Column("version", String),
             Column("is_latest", Boolean),
@@ -1623,8 +1617,7 @@ class Catalog:
         Example output:
             {
                 "source": "s3://ldb-public",
-                "parent": "animals/dogs",
-                "name": "dog.jpg",
+                "path": "animals/dogs/dog.jpg",
                 ...
             }
         """
@@ -1675,8 +1668,7 @@ class Catalog:
     def _get_row_uid(self, row: RowDict) -> UniqueId:
         return UniqueId(
             row["source"],
-            row["parent"],
-            row["name"],
+            row["path"],
             row["size"],
             row["etag"],
             row["version"],
@@ -2308,16 +2300,14 @@ class Catalog:
             if column == "du":
                 field_set.add("dir_type")
                 field_set.add("size")
-                field_set.add("parent")
-                field_set.add("name")
+                field_set.add("path")
             elif column == "name":
-                field_set.add("name")
+                field_set.add("path")
             elif column == "owner":
                 field_set.add("owner_name")
             elif column == "path":
                 field_set.add("dir_type")
-                field_set.add("parent")
-                field_set.add("name")
+                field_set.add("path")
             elif column == "size":
                 field_set.add("size")
             elif column == "type":

datachain/client/azure.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import posixpath
 from typing import Any
 from adlfs import AzureBlobFileSystem
@@ -14,16 +13,10 @@ class AzureClient(Client):
     PREFIX = "az://"
     protocol = "az"
-    def convert_info(self, v: dict[str, Any], parent: str) -> Entry:
+    def convert_info(self, v: dict[str, Any], path: str) -> Entry:
         version_id = v.get("version_id")
-        name = v.get("name", "").split(DELIMITER)[-1]
-        if version_id:
-            version_suffix = f"?versionid={version_id}"
-            if name.endswith(version_suffix):
-                name = name[: -len(version_suffix)]
         return Entry.from_file(
-            parent=parent,
-            name=name,
+            path=path,
             etag=v.get("etag", "").strip('"'),
             version=version_id or "",
             is_latest=version_id is None or bool(v.get("is_current_version")),
@@ -50,9 +43,9 @@ class AzureClient(Client):
                             if not self._is_valid_key(b["name"]):
                                 continue
                             info = (await self.fs._details([b]))[0]
-                            full_path = info["name"]
-                            parent = posixpath.dirname(self.rel_path(full_path))
-                            entries.append(self.convert_info(info, parent))
+                            entries.append(
+                                self.convert_info(info, self.rel_path(info["name"]))
+                            )
                         if entries:
                             await result_queue.put(entries)
                             pbar.update(len(entries))

datachain/client/fsspec.py CHANGED Viewed

@@ -277,7 +277,7 @@ class Client(ABC):
             if info["type"] == "directory":
                 subdirs.add(subprefix)
             else:
-                files.append(self.convert_info(info, prefix))
+                files.append(self.convert_info(info, subprefix))
         if files:
             await result_queue.put(files)
         found_count = len(subdirs) + len(files)
@@ -360,12 +360,11 @@ class Client(ABC):
         parent_uid = UniqueId(
             parent["source"],
-            parent["parent"],
-            parent["name"],
-            parent["etag"],
+            parent["path"],
             parent["size"],
-            parent["vtype"],
-            parent["location"],
+            parent["etag"],
+            vtype=parent["vtype"],
+            location=parent["location"],
         )
         f = self.open_object(parent_uid, use_cache=use_cache)
         return FileSlice(f, offset, size, posixpath.basename(uid.path))

datachain/client/gcs.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import asyncio
 import json
 import os
-import posixpath
 from collections.abc import Iterable
 from datetime import datetime
 from typing import Any, Optional, cast
@@ -110,20 +109,11 @@ class GCSClient(Client):
     def _entry_from_dict(self, d: dict[str, Any]) -> Entry:
         info = self.fs._process_object(self.name, d)
-        full_path = info["name"]
-        subprefix = self.rel_path(full_path)
-        parent = posixpath.dirname(subprefix)
-        return self.convert_info(info, parent)
-    def convert_info(self, v: dict[str, Any], parent: str) -> Entry:
-        name = v.get("name", "").split(DELIMITER)[-1]
-        if "generation" in v:
-            gen = f"#{v['generation']}"
-            if name.endswith(gen):
-                name = name[: -len(gen)]
+        return self.convert_info(info, self.rel_path(info["name"]))
+    def convert_info(self, v: dict[str, Any], path: str) -> Entry:
         return Entry.from_file(
-            parent=parent,
-            name=name,
+            path=path,
             etag=v.get("etag", ""),
             version=v.get("generation", ""),
             is_latest=not v.get("timeDeleted"),

datachain/client/local.py CHANGED Viewed

@@ -140,11 +140,9 @@ class FileClient(Client):
             full_path += "/"
         return full_path
-    def convert_info(self, v: dict[str, Any], parent: str) -> Entry:
-        name = posixpath.basename(v["name"])
+    def convert_info(self, v: dict[str, Any], path: str) -> Entry:
         return Entry.from_file(
-            parent=parent,
-            name=name,
+            path=path,
             etag=v["mtime"].hex(),
             is_latest=True,
             last_modified=datetime.fromtimestamp(v["mtime"], timezone.utc),

datachain/client/s3.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import asyncio
-import posixpath
 from typing import Any, cast
 from botocore.exceptions import NoCredentialsError
@@ -112,10 +111,8 @@ class ClientS3(Client):
         await self._fetch_flat(start_prefix, result_queue)
     def _entry_from_boto(self, v, bucket, versions=False):
-        parent, name = posixpath.split(v["Key"])
         return Entry.from_file(
-            parent=parent,
-            name=name,
+            path=v["Key"],
             etag=v.get("ETag", "").strip('"'),
             version=ClientS3.clean_s3_version(v.get("VersionId", "")),
             is_latest=v.get("IsLatest", True),
@@ -145,7 +142,7 @@ class ClientS3(Client):
             if info["type"] == "directory":
                 subdirs.add(subprefix)
             else:
-                files.append(self.convert_info(info, prefix.rstrip("/")))
+                files.append(self.convert_info(info, subprefix))
                 pbar.update()
             found = True
         if not found:
@@ -159,10 +156,9 @@ class ClientS3(Client):
     def clean_s3_version(ver):
         return ver if ver != "null" else ""
-    def convert_info(self, v: dict[str, Any], parent: str) -> Entry:
+    def convert_info(self, v: dict[str, Any], path: str) -> Entry:
         return Entry.from_file(
-            parent=parent,
-            name=v.get("Key", "").split(DELIMITER)[-1],
+            path=path,
             etag=v.get("ETag", "").strip('"'),
             version=ClientS3.clean_s3_version(v.get("VersionId", "")),
             is_latest=v.get("IsLatest", True),

datachain/data_storage/schema.py CHANGED Viewed

@@ -80,8 +80,7 @@ class DirExpansion:
             q.c.vtype,
             (q.c.dir_type == DirType.DIR).label("is_dir"),
             q.c.source,
-            q.c.parent,
-            q.c.name,
+            q.c.path,
             q.c.version,
             q.c.location,
         )
@@ -94,36 +93,29 @@ class DirExpansion:
                 q.c.vtype,
                 q.c.is_dir,
                 q.c.source,
-                q.c.parent,
-                q.c.name,
+                q.c.path,
                 q.c.version,
                 f.max(q.c.location).label("location"),
             )
             .select_from(q)
-            .group_by(
-                q.c.source, q.c.parent, q.c.name, q.c.vtype, q.c.is_dir, q.c.version
-            )
-            .order_by(
-                q.c.source, q.c.parent, q.c.name, q.c.vtype, q.c.is_dir, q.c.version
-            )
+            .group_by(q.c.source, q.c.path, q.c.vtype, q.c.is_dir, q.c.version)
+            .order_by(q.c.source, q.c.path, q.c.vtype, q.c.is_dir, q.c.version)
         )
     @classmethod
     def query(cls, q):
         q = cls.base_select(q).cte(recursive=True)
-        parent_parent = path.parent(q.c.parent)
-        parent_name = path.name(q.c.parent)
+        parent = path.parent(q.c.path)
         q = q.union_all(
             sa.select(
                 sa.literal(-1).label("sys__id"),
                 sa.literal("").label("vtype"),
                 true().label("is_dir"),
                 q.c.source,
-                parent_parent.label("parent"),
-                parent_name.label("name"),
+                parent.label("path"),
                 sa.literal("").label("version"),
                 null().label("location"),
-            ).where((parent_name != "") | (parent_parent != ""))
+            ).where(parent != "")
         )
         return cls.apply_group_by(q)

datachain/data_storage/warehouse.py CHANGED Viewed

@@ -19,6 +19,7 @@ from datachain.client import Client
 from datachain.data_storage.serializer import Serializable
 from datachain.dataset import DatasetRecord, RowDict
 from datachain.node import DirType, DirTypeGroup, Entry, Node, NodeWithPath, get_path
+from datachain.sql.functions import path as pathfunc
 from datachain.sql.types import Int, SQLType
 from datachain.storage import StorageURI
 from datachain.utils import sql_escape_like
@@ -373,9 +374,7 @@ class AbstractWarehouse(ABC, Serializable):
         else:
             parent = self.get_node_by_path(dr, path.lstrip("/").rstrip("/*"))
-            select_query = select_query.where(
-                (dr.c.parent == parent.path) | (self.path_expr(dr) == path)
-            )
+            select_query = select_query.where(pathfunc.parent(dr.c.path) == parent.path)
         return select_query
     def rename_dataset_table(
@@ -532,8 +531,8 @@ class AbstractWarehouse(ABC, Serializable):
             dr,
             parent_path,
             type="dir",
-            conds=[sa.Column("parent") == parent_path],
-            order_by=["source", "parent", "name"],
+            conds=[pathfunc.parent(sa.Column("path")) == parent_path],
+            order_by=["source", "path"],
         )
         return self.get_nodes(query)
@@ -556,7 +555,7 @@ class AbstractWarehouse(ABC, Serializable):
                 & ~self.instr(relpath, "/")
                 & (self.path_expr(de) != dirpath)
             )
-            .order_by(de.c.source, de.c.parent, de.c.name, de.c.version)
+            .order_by(de.c.source, de.c.path, de.c.version)
         )
     def _get_node_by_path_list(
@@ -572,8 +571,8 @@ class AbstractWarehouse(ABC, Serializable):
         ).subquery()
         query = self.expand_query(de, dr)
-        q = query.where((de.c.parent == parent) & (de.c.name == name)).order_by(
-            de.c.source, de.c.parent, de.c.name, de.c.version
+        q = query.where(de.c.path == get_path(parent, name)).order_by(
+            de.c.source, de.c.path, de.c.version
         )
         row = next(self.dataset_rows_select(q), None)
         if not row:
@@ -636,8 +635,7 @@ class AbstractWarehouse(ABC, Serializable):
             case((de.c.is_dir == true(), DirType.DIR), else_=dr.c.dir_type).label(
                 "dir_type"
             ),
-            de.c.parent,
-            de.c.name,
+            de.c.path,
             with_default(dr.c.etag),
             de.c.version,
             with_default(dr.c.is_latest),
@@ -670,7 +668,7 @@ class AbstractWarehouse(ABC, Serializable):
             .where(
                 dr.c.is_latest == true(),
                 dr.c.dir_type != DirType.DIR,
-                (dr.c.parent + "/").startswith(path),
+                dr.c.path.startswith(path),
             )
             .exists()
         )
@@ -678,8 +676,7 @@ class AbstractWarehouse(ABC, Serializable):
         if not row:
             raise FileNotFoundError(f"Unable to resolve path {path}")
         path = path.removesuffix("/")
-        parent, name = path.rsplit("/", 1) if "/" in path else ("", path)
-        return Node.from_dir(parent, name)
+        return Node.from_dir(path)
     def expand_path(self, dataset_rows: "DataTable", path: str) -> list[Node]:
         """Simulates Unix-like shell expansion"""
@@ -703,18 +700,21 @@ class AbstractWarehouse(ABC, Serializable):
         de = dr.dataset_dir_expansion(
             dr.select().where(dr.c.is_latest == true()).subquery()
         ).subquery()
-        where_cond = de.c.parent == parent_path
+        where_cond = pathfunc.parent(de.c.path) == parent_path
         if parent_path == "":
             # Exclude the root dir
-            where_cond = where_cond & (de.c.name != "")
+            where_cond = where_cond & (de.c.path != "")
         inner_query = self.expand_query(de, dr).where(where_cond).subquery()
+        def field_to_expr(f):
+            if f == "name":
+                return pathfunc.name(inner_query.c.path)
+            return getattr(inner_query.c, f)
         return self.db.execute(
-            sa.select(*(getattr(inner_query.c, f) for f in fields))
-            .select_from(inner_query)
-            .order_by(
+            select(*(field_to_expr(f) for f in fields)).order_by(
                 inner_query.c.source,
-                inner_query.c.parent,
-                inner_query.c.name,
+                inner_query.c.path,
                 inner_query.c.version,
             )
         )
@@ -727,21 +727,20 @@ class AbstractWarehouse(ABC, Serializable):
         """
         dr = dataset_rows
         dirpath = f"{parent_path}/"
-        relpath = func.substr(self.path_expr(dr), len(dirpath) + 1)
         def field_to_expr(f):
             if f == "name":
-                return relpath
+                return pathfunc.name(dr.c.path)
             return getattr(dr.c, f)
         q = (
             select(*(field_to_expr(f) for f in fields))
             .where(
                 self.path_expr(dr).like(f"{sql_escape_like(dirpath)}%"),
-                ~self.instr(relpath, "/"),
+                ~self.instr(pathfunc.name(dr.c.path), "/"),
                 dr.c.is_latest == true(),
             )
-            .order_by(dr.c.source, dr.c.parent, dr.c.name, dr.c.version, dr.c.etag)
+            .order_by(dr.c.source, dr.c.path, dr.c.version, dr.c.etag)
         )
         return self.db.execute(q)
@@ -758,7 +757,7 @@ class AbstractWarehouse(ABC, Serializable):
         if isinstance(node, dict):
             is_dir = node.get("is_dir", node["dir_type"] in DirTypeGroup.SUBOBJ_DIR)
             node_size = node["size"]
-            path = get_path(node["parent"], node["name"])
+            path = node["path"]
         else:
             is_dir = node.is_container
             node_size = node.size
@@ -790,7 +789,7 @@ class AbstractWarehouse(ABC, Serializable):
         return results[0] or 0, 0
     def path_expr(self, t):
-        return case((t.c.parent == "", t.c.name), else_=t.c.parent + "/" + t.c.name)
+        return t.c.path
     def _find_query(
         self,
@@ -947,11 +946,7 @@ class AbstractWarehouse(ABC, Serializable):
         tq = target_query.alias("target_query")
         source_target_join = sa.join(
-            sq,
-            tq,
-            (sq.c.source == tq.c.source)
-            & (sq.c.parent == tq.c.parent)
-            & (sq.c.name == tq.c.name),
+            sq, tq, (sq.c.source == tq.c.source) & (sq.c.path == tq.c.path)
         )
         return (

datachain/lib/convert/sql_to_python.py CHANGED Viewed

@@ -1,23 +1,18 @@
-from datetime import datetime
+from decimal import Decimal
 from typing import Any
-from sqlalchemy import ARRAY, JSON, Boolean, DateTime, Float, Integer, String
+from sqlalchemy import ColumnElement
-from datachain.data_storage.sqlite import Column
-SQL_TO_PYTHON = {
-    String: str,
-    Integer: int,
-    Float: float,
-    Boolean: bool,
-    DateTime: datetime,
-    ARRAY: list,
-    JSON: dict,
-}
+def sql_to_python(args_map: dict[str, ColumnElement]) -> dict[str, Any]:
+    res = {}
+    for name, sql_exp in args_map.items():
+        try:
+            type_ = sql_exp.type.python_type
+            if type_ == Decimal:
+                type_ = float
+        except NotImplementedError:
+            type_ = str
+        res[name] = type_
-def sql_to_python(args_map: dict[str, Column]) -> dict[str, Any]:
-    return {
-        k: SQL_TO_PYTHON.get(type(v.type), str)  # type: ignore[union-attr]
-        for k, v in args_map.items()
-    }
+    return res

datachain/lib/dc.py CHANGED Viewed

@@ -20,8 +20,10 @@ import pandas as pd
 import sqlalchemy
 from pydantic import BaseModel, create_model
 from sqlalchemy.sql.functions import GenericFunction
+from sqlalchemy.sql.sqltypes import NullType
 from datachain import DataModel
+from datachain.lib.convert.python_to_sql import python_to_sql
 from datachain.lib.convert.values_to_tuples import values_to_tuples
 from datachain.lib.data_model import DataType
 from datachain.lib.dataset_info import DatasetInfo
@@ -47,6 +49,7 @@ from datachain.query.dataset import (
     detach,
 )
 from datachain.query.schema import Column, DatasetRow
+from datachain.sql.functions import path as pathfunc
 from datachain.utils import inside_notebook
 if TYPE_CHECKING:
@@ -110,6 +113,11 @@ class DatasetMergeError(DataChainParamsError):  # noqa: D101
         super().__init__(f"Merge error on='{on_str}'{right_on_str}: {msg}")
+class DataChainColumnError(DataChainParamsError):  # noqa: D101
+    def __init__(self, col_name, msg):  # noqa: D107
+        super().__init__(f"Error for column {col_name}: {msg}")
 OutputType = Union[None, DataType, Sequence[str], dict[str, DataType]]
@@ -195,7 +203,7 @@ class DataChain(DatasetQuery):
     DEFAULT_FILE_RECORD: ClassVar[dict] = {
         "source": "",
-        "name": "",
+        "path": "",
         "vtype": "",
         "size": 0,
     }
@@ -225,6 +233,17 @@ class DataChain(DatasetQuery):
         """Get schema of the chain."""
         return self._effective_signals_schema.values
+    def column(self, name: str) -> Column:
+        """Returns Column instance with a type if name is found in current schema,
+        otherwise raises an exception.
+        """
+        name_path = name.split(".")
+        for path, type_, _, _ in self.signals_schema.get_flat_tree():
+            if path == name_path:
+                return Column(name, python_to_sql(type_))
+        raise ValueError(f"Column with name {name} not found in the schema")
     def print_schema(self) -> None:
         """Print schema of the chain."""
         self._effective_signals_schema.print_tree()
@@ -829,6 +848,12 @@ class DataChain(DatasetQuery):
         )
         ```
         """
+        for col_name, expr in kwargs.items():
+            if not isinstance(expr, Column) and isinstance(expr.type, NullType):
+                raise DataChainColumnError(
+                    col_name, f"Cannot infer type with expression {expr}"
+                )
         mutated = {}
         schema = self.signals_schema
         for name, value in kwargs.items():
@@ -1562,10 +1587,11 @@ class DataChain(DatasetQuery):
         use_cache: bool = True,
     ) -> None:
         """Method that exports all files from chain to some folder."""
-        if placement == "filename":
-            print("Checking if file names are unique")
-            if self.distinct(f"{signal}.name").count() != self.count():
-                raise ValueError("Files with the same name found")
+        if placement == "filename" and (
+            super().distinct(pathfunc.name(C(f"{signal}__path"))).count()
+            != self.count()
+        ):
+            raise ValueError("Files with the same name found")
         for file in self.collect(signal):
             file.export(output, placement, use_cache)  # type: ignore[union-attr]

datachain/lib/file.py CHANGED Viewed

@@ -6,7 +6,7 @@ from abc import ABC, abstractmethod
 from contextlib import contextmanager
 from datetime import datetime
 from io import BytesIO
-from pathlib import Path
+from pathlib import Path, PurePosixPath
 from typing import TYPE_CHECKING, Any, ClassVar, Literal, Optional, Union
 from urllib.parse import unquote, urlparse
 from urllib.request import url2pathname
@@ -111,8 +111,7 @@ class File(DataModel):
     """`DataModel` for reading binary files."""
     source: str = Field(default="")
-    parent: str = Field(default="")
-    name: str
+    path: str
     size: int = Field(default=0)
     version: str = Field(default="")
     etag: str = Field(default="")
@@ -123,8 +122,7 @@ class File(DataModel):
     _datachain_column_types: ClassVar[dict[str, Any]] = {
         "source": String,
-        "parent": String,
-        "name": String,
+        "path": String,
         "size": Int,
         "version": String,
         "etag": String,
@@ -136,8 +134,7 @@ class File(DataModel):
     _unique_id_keys: ClassVar[list[str]] = [
         "source",
-        "parent",
-        "name",
+        "path",
         "size",
         "etag",
         "version",
@@ -168,11 +165,9 @@ class File(DataModel):
     def validate_location(cls, v):
         return File._validate_dict(v)
-    @field_validator("parent", mode="before")
+    @field_validator("path", mode="before")
     @classmethod
     def validate_path(cls, path):
-        if path == "":
-            return ""
         return Path(path).as_posix()
     def model_dump_custom(self):
@@ -185,6 +180,14 @@ class File(DataModel):
         self._catalog = None
         self._caching_enabled = False
+    @property
+    def name(self):
+        return PurePosixPath(self.path).name
+    @property
+    def parent(self):
+        return str(PurePosixPath(self.path).parent)
     @contextmanager
     def open(self, mode: Literal["rb", "r"] = "rb"):
         """Open the file and return a file object."""
@@ -261,19 +264,19 @@ class File(DataModel):
     def get_file_suffix(self):
         """Returns last part of file name with `.`."""
-        return Path(self.name).suffix
+        return PurePosixPath(self.path).suffix
     def get_file_ext(self):
         """Returns last part of file name without `.`."""
-        return Path(self.name).suffix.strip(".")
+        return PurePosixPath(self.path).suffix.strip(".")
     def get_file_stem(self):
         """Returns file name without extension."""
-        return Path(self.name).stem
+        return PurePosixPath(self.path).stem
     def get_full_name(self):
         """Returns name with parent directories."""
-        return (Path(self.parent) / self.name).as_posix()
+        return self.path
     def get_uri(self):
         """Returns file URI."""
@@ -355,8 +358,7 @@ def get_file(type_: Literal["binary", "text", "image"] = "binary"):
     def get_file_type(
         source: str,
-        parent: str,
-        name: str,
+        path: str,
         size: int,
         version: str,
         etag: str,
@@ -367,8 +369,7 @@ def get_file(type_: Literal["binary", "text", "image"] = "binary"):
     ) -> file:  # type: ignore[valid-type]
         return file(
             source=source,
-            parent=parent,
-            name=name,
+            path=path,
             size=size,
             version=version,
             etag=etag,

datachain/lib/webdataset.py CHANGED Viewed

@@ -119,7 +119,7 @@ class Builder:
         return self._tar.extractfile(item).read().decode(self._encoding)
     def add(self, file: tarfile.TarInfo):
-        fstream = File(name=file.name)
+        fstream = File(path=file.name)
         ext = fstream.get_file_ext()
         stem = fstream.get_file_stem()
@@ -176,9 +176,8 @@ class Builder:
         )
         etag = hashlib.md5(etag_string.encode(), usedforsecurity=False).hexdigest()
         return File(
-            name=core_file.name,
             source=self._tar_stream.source,
-            parent=new_parent,
+            path=f"{new_parent}/{core_file.name}",
             version=self._tar_stream.version,
             size=core_file.size,
             etag=etag,

datachain/listing.py CHANGED Viewed

@@ -5,11 +5,12 @@ from itertools import zip_longest
 from typing import TYPE_CHECKING, Optional
 from fsspec.asyn import get_loop, sync
-from sqlalchemy import Column, case
+from sqlalchemy import Column
 from sqlalchemy.sql import func
 from tqdm import tqdm
 from datachain.node import DirType, Entry, Node, NodeWithPath
+from datachain.sql.functions import path as pathfunc
 from datachain.utils import suffix_to_number
 if TYPE_CHECKING:
@@ -129,7 +130,7 @@ class Listing:
                 dir_path = []
                 if not copy_dir_contents:
                     dir_path.append(node.name)
-                subtree_nodes = src.find(sort=["parent", "name"])
+                subtree_nodes = src.find(sort=["path"])
                 all_nodes.extend(
                     NodeWithPath(n.n, path=dir_path + n.path) for n in subtree_nodes
                 )
@@ -148,8 +149,7 @@ class Listing:
                 elif from_dataset:
                     node_path = [
                         src.listing.client.name,
-                        node.parent,
-                        node.name,
+                        node.path,
                     ]
                 else:
                     node_path = [node.name]
@@ -201,25 +201,19 @@ class Listing:
         dr = self.dataset_rows
         conds = []
         if names:
-            f = Column("name").op("GLOB")
-            conds.extend(f(name) for name in names)
+            for name in names:
+                conds.append(pathfunc.name(Column("path")).op("GLOB")(name))
         if inames:
-            f = func.lower(Column("name")).op("GLOB")
-            conds.extend(f(iname.lower()) for iname in inames)
+            for iname in inames:
+                conds.append(
+                    func.lower(pathfunc.name(Column("path"))).op("GLOB")(iname.lower())
+                )
         if paths:
-            node_path = case(
-                (Column("parent") == "", Column("name")),
-                else_=Column("parent") + "/" + Column("name"),
-            )
-            f = node_path.op("GLOB")
-            conds.extend(f(path) for path in paths)
+            for path in paths:
+                conds.append(Column("path").op("GLOB")(path))
         if ipaths:
-            node_path = case(
-                (Column("parent") == "", Column("name")),
-                else_=Column("parent") + "/" + Column("name"),
-            )
-            f = func.lower(node_path).op("GLOB")
-            conds.extend(f(ipath.lower()) for ipath in ipaths)
+            for ipath in ipaths:
+                conds.append(func.lower(Column("path")).op("GLOB")(ipath.lower()))
         if size is not None:
             size_limit = suffix_to_number(size)

datachain/node.py CHANGED Viewed

@@ -50,8 +50,7 @@ class Node:
     sys__rand: int = -1
     vtype: str = ""
     dir_type: Optional[int] = None
-    parent: str = ""
-    name: str = ""
+    path: str = ""
     etag: str = ""
     version: Optional[str] = None
     is_latest: bool = True
@@ -62,10 +61,6 @@ class Node:
     location: Optional[str] = None
     source: StorageURI = StorageURI("")
-    @property
-    def path(self) -> str:
-        return f"{self.parent}/{self.name}" if self.parent else self.name
     @property
     def is_dir(self) -> bool:
         return self.dir_type == DirType.DIR
@@ -107,13 +102,12 @@ class Node:
             return self.path + "/"
         return self.path
-    def as_uid(self, storage: Optional[StorageURI] = None):
+    def as_uid(self, storage: Optional[StorageURI] = None) -> UniqueId:
         if storage is None:
             storage = self.source
         return UniqueId(
             storage=storage,
-            parent=self.parent,
-            name=self.name,
+            path=self.path,
             size=self.size,
             version=self.version or "",
             etag=self.etag,
@@ -129,20 +123,30 @@ class Node:
         return cls(**kw)
     @classmethod
-    def from_dir(cls, parent, name, **kwargs) -> "Node":
-        return cls(sys__id=-1, dir_type=DirType.DIR, parent=parent, name=name, **kwargs)
+    def from_dir(cls, path, **kwargs) -> "Node":
+        return cls(sys__id=-1, dir_type=DirType.DIR, path=path, **kwargs)
     @classmethod
     def root(cls) -> "Node":
         return cls(sys__id=-1, dir_type=DirType.DIR)
+    @property
+    def name(self):
+        return self.path.rsplit("/", 1)[-1]
+    @property
+    def parent(self):
+        split = self.path.rsplit("/", 1)
+        if len(split) <= 1:
+            return ""
+        return split[0]
 @attrs.define
 class Entry:
     vtype: str = ""
     dir_type: Optional[int] = None
-    parent: str = ""
-    name: str = ""
+    path: str = ""
     etag: str = ""
     version: str = ""
     is_latest: bool = True
@@ -157,27 +161,34 @@ class Entry:
         return self.dir_type == DirType.DIR
     @classmethod
-    def from_dir(cls, parent: str, name: str, **kwargs) -> "Entry":
-        return cls(dir_type=DirType.DIR, parent=parent, name=name, **kwargs)
+    def from_dir(cls, path: str, **kwargs) -> "Entry":
+        return cls(dir_type=DirType.DIR, path=path, **kwargs)
     @classmethod
-    def from_file(cls, parent: str, name: str, **kwargs) -> "Entry":
-        return cls(dir_type=DirType.FILE, parent=parent, name=name, **kwargs)
+    def from_file(cls, path: str, **kwargs) -> "Entry":
+        return cls(dir_type=DirType.FILE, path=path, **kwargs)
     @classmethod
     def root(cls):
         return cls(dir_type=DirType.DIR)
-    @property
-    def path(self) -> str:
-        return f"{self.parent}/{self.name}" if self.parent else self.name
     @property
     def full_path(self) -> str:
         if self.is_dir and self.path:
             return self.path + "/"
         return self.path
+    @property
+    def name(self):
+        return self.path.rsplit("/", 1)[-1]
+    @property
+    def parent(self):
+        split = self.path.rsplit("/", 1)
+        if len(split) <= 1:
+            return ""
+        return split[0]
 def get_path(parent: str, name: str):
     return f"{parent}/{name}" if parent else name

datachain/query/builtins.py CHANGED Viewed

@@ -20,8 +20,7 @@ def load_tar(raw):
 @udf(
     (
         C.source,
-        C.name,
-        C.parent,
+        C.path,
         C.size,
         C.vtype,
         C.dir_type,
@@ -37,8 +36,7 @@ def load_tar(raw):
 )
 def index_tar(
     source,
-    name,
-    parent,
+    parent_path,
     size,
     vtype,
     dir_type,
@@ -52,9 +50,8 @@ def index_tar(
 ):
     # generate original tar files as well, along with subobjects
     yield DatasetRow.create(
-        name,
         source=source,
-        parent=parent,
+        path=parent_path,
         size=size,
         vtype=vtype,
         dir_type=dir_type,
@@ -66,15 +63,12 @@ def index_tar(
         etag=etag,
     )
-    parent_path = name if not parent else f"{parent}/{name}"
     for info in tar_entries:
         if info.isfile():
             full_path = f"{parent_path}/{info.name}"
-            parent_dir, subobject_name = full_path.rsplit("/", 1)
             yield DatasetRow.create(
-                subobject_name,
                 source=source,
-                parent=parent_dir,
+                path=full_path,
                 size=info.size,
                 vtype="tar",
                 location={
@@ -83,8 +77,7 @@ def index_tar(
                     "size": info.size,
                     "parent": {
                         "source": source,
-                        "parent": parent,
-                        "name": name,
+                        "path": parent_path,
                         "version": version,
                         "size": size,
                         "etag": etag,

datachain/query/dataset.py CHANGED Viewed

@@ -307,7 +307,7 @@ class Subtract(DatasetDiffOperation):
 class Changed(DatasetDiffOperation):
     """
     Calculates rows that are changed in a source query compared to target query
-    Changed means it has same source + parent + name but different last_modified
+    Changed means it has same source + path but different last_modified
     Example:
         >>> ds = DatasetQuery(name="dogs_cats") # some older dataset with embeddings
         >>> ds_updated = (
@@ -1526,7 +1526,7 @@ class DatasetQuery:
     @detach
     def subtract(self, dq: "DatasetQuery") -> "Self":
-        return self._subtract(dq, on=["source", "parent", "name"])
+        return self._subtract(dq, on=["source", "path"])
     @detach
     def _subtract(self, dq: "DatasetQuery", on: Sequence[str]) -> "Self":

datachain/query/schema.py CHANGED Viewed

@@ -215,8 +215,7 @@ def normalize_param(param: UDFParamSpec) -> UDFParameter:
 class DatasetRow:
     schema: ClassVar[dict[str, type[SQLType]]] = {
         "source": String,
-        "parent": String,
-        "name": String,
+        "path": String,
         "size": Int64,
         "location": JSON,
         "vtype": String,
@@ -231,9 +230,8 @@ class DatasetRow:
     @staticmethod
     def create(
-        name: str,
+        path: str,
         source: str = "",
-        parent: str = "",
         size: int = 0,
         location: Optional[dict[str, Any]] = None,
         vtype: str = "",
@@ -245,7 +243,6 @@ class DatasetRow:
         version: str = "",
         etag: str = "",
     ) -> tuple[
-        str,
         str,
         str,
         int,
@@ -267,8 +264,7 @@ class DatasetRow:
         return (  # type: ignore [return-value]
             source,
-            parent,
-            name,
+            path,
             size,
             location,
             vtype,

datachain/sql/functions/__init__.py CHANGED Viewed

@@ -1,16 +1,17 @@
 from sqlalchemy.sql.expression import func
-from . import path, string
+from . import array, path, string
+from .array import avg
 from .conditional import greatest, least
 from .random import rand
 count = func.count
 sum = func.sum
-avg = func.avg
 min = func.min
 max = func.max
 __all__ = [
+    "array",
     "avg",
     "count",
     "func",

datachain/sql/functions/array.py CHANGED Viewed

@@ -44,7 +44,15 @@ class sip_hash_64(GenericFunction):  # noqa: N801
     inherit_cache = True
+class avg(GenericFunction):  # noqa: N801
+    type = Float()
+    package = "array"
+    name = "avg"
+    inherit_cache = True
 compiler_not_implemented(cosine_distance)
 compiler_not_implemented(euclidean_distance)
 compiler_not_implemented(length)
 compiler_not_implemented(sip_hash_64)
+compiler_not_implemented(avg)

datachain/sql/sqlite/base.py CHANGED Viewed

@@ -78,6 +78,7 @@ def setup():
     compiles(conditional.least, "sqlite")(compile_least)
     compiles(Values, "sqlite")(compile_values)
     compiles(random.rand, "sqlite")(compile_rand)
+    compiles(array.avg, "sqlite")(compile_avg)
     if load_usearch_extension(sqlite3.connect(":memory:")):
         compiles(array.cosine_distance, "sqlite")(compile_cosine_distance_ext)
@@ -349,6 +350,10 @@ def compile_rand(element, compiler, **kwargs):
     return compiler.process(func.random(), **kwargs)
+def compile_avg(element, compiler, **kwargs):
+    return compiler.process(func.avg(*element.clauses.clauses), **kwargs)
 def load_usearch_extension(conn) -> bool:
     try:
         # usearch is part of the vector optional dependencies

{datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datachain
-Version: 0.2.17
+Version: 0.3.0
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License: Apache-2.0

{datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,15 @@
 datachain/__init__.py,sha256=GeyhE-5LgfJav2OKYGaieP2lBvf2Gm-ihj7thnK9zjI,800
 datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
 datachain/asyn.py,sha256=CKCFQJ0CbB3r04S7mUTXxriKzPnOvdUaVPXjM8vCtJw,7644
-datachain/cache.py,sha256=N6PCEFJlWRpq7f_zeBNoaURFCJFAV7ibsLJqyiMHbBg,4207
+datachain/cache.py,sha256=wznC2pge6RhlPTaJfBVGjmBc6bxWCPThu4aTFMltvFU,4076
 datachain/cli.py,sha256=DbmI1sXs7-KCQz6RdLE_JAp3XO3yrTSRJ71LdUzx-XE,33099
 datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
 datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
 datachain/dataset.py,sha256=MZezyuJWNj_3PEtzr0epPMNyWAOTrhTSPI5FmemV6L4,14470
 datachain/error.py,sha256=GY9KYTmb7GHXn2gGHV9X-PBhgwLj3i7VpK7tGHtAoGM,1279
 datachain/job.py,sha256=bk25bIqClhgRPzlXAhxpTtDeewibQe5l3S8Cf7db0gM,1229
-datachain/listing.py,sha256=JEhi5WOSV2LUqRQgt0-fdmJ8Zb5fNpNFzBQcuTtx63o,8555
-datachain/node.py,sha256=LwzSOSM9SbPLI5RvYDsiEkk7d5rbMX8huzM_m7uWKx4,5917
+datachain/listing.py,sha256=keLkvPfumDA3gijeIiinH5yGWe71qCxgF5HqqP5AeH4,8299
+datachain/node.py,sha256=frxZWoEvqUvk9pyXmVaeiNCs3W-xjC_sENmUD11V06Q,6006
 datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
 datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
 datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
@@ -17,33 +17,33 @@ datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
 datachain/utils.py,sha256=kgH5NPj47eC_KrFTd6ZS206lKVhnJVFt5XsqkK6ppTc,12483
 datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
-datachain/catalog/catalog.py,sha256=z0tclel0kNdSzJojNRRnRVhgt-K7ElO3CeuurlwQMGI,80612
+datachain/catalog/catalog.py,sha256=BJ8ZP9mleUbN5Y4CoYJ94R_tnnsA9sHdZq2RBGwVN5Y,80291
 datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
 datachain/catalog/loader.py,sha256=GJ8zhEYkC7TuaPzCsjJQ4LtTdECu-wwYzC12MikPOMQ,7307
 datachain/catalog/subclass.py,sha256=B5R0qxeTYEyVAAPM1RutBPSoXZc8L5mVVZeSGXki9Sw,2096
 datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
-datachain/client/azure.py,sha256=rxvF5erntGD32Y3DYK_TUCsyV2ALfuWWTnE8IWGwKEo,2542
+datachain/client/azure.py,sha256=3RfDTAI_TszDy9WazHQd3bI3sS2wDFrNXfNqCDewZgE,2214
 datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
-datachain/client/fsspec.py,sha256=F1Iyyw0iTrp2wQTFeignGtaHpm5Rg_cvbKaIzBX5aSc,13390
-datachain/client/gcs.py,sha256=ucX8e6JrqlFY-f80zkv084vxnKdtxpO32QJ-RG8Nv1s,4454
-datachain/client/local.py,sha256=NQVkLTJQ-a7Udavqbh_4uT-IejfZQYn10j22owz9sis,5150
-datachain/client/s3.py,sha256=TmW4f7VUM5CMZjSmgyFQFKeMUGrXt2SLoLEbLOUleiU,6296
+datachain/client/fsspec.py,sha256=VrssoNenXsFxznr-Xx1haZPlXU-dr-WHdxmdbgFI_UA,13378
+datachain/client/gcs.py,sha256=Mt77W_l8_fK61gLm4mmxNmENuOM0ETwxdiFp4S8d-_w,4105
+datachain/client/local.py,sha256=yhC-pMKdprJ-rMGwPpBmPkdkG5riIIKkVSe6kNpyCok,5076
+datachain/client/s3.py,sha256=GfRZZzNPQPRsYjoef8bbsLbanJPUlCbyGTTK8ojzp8A,6136
 datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
 datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
 datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
 datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
 datachain/data_storage/metastore.py,sha256=nxcY6nwyEmQWMAo33sNGO-FgUFQs2amBGGnZz2ftEz0,55362
-datachain/data_storage/schema.py,sha256=FQvt5MUMSnI5ZAE7Nthae4aaJpt8JC4nH8KiWDuhJkk,8135
+datachain/data_storage/schema.py,sha256=Idi-29fckvZozzvkyz3nTR2FOIajPlSuPdIEO7SMvXM,7863
 datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
 datachain/data_storage/sqlite.py,sha256=0r6L_a2hdGRoR_gl06v1qWhEFOS_Q31aldHyk07Yx-M,26857
-datachain/data_storage/warehouse.py,sha256=G79jsQwA6anYPWoiBXngwPyx-uP7yGIWqhZGc4TL5mY,33591
+datachain/data_storage/warehouse.py,sha256=eEZvzYwpqwzzLXqHWjB6l4tRsIHifIr8VWI5STm53LE,33310
 datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/lib/arrow.py,sha256=R8wDUDEa-5hYjI3HW9cqvOYYJpeeah5lbhFIL3gkmcE,4915
 datachain/lib/clip.py,sha256=16u4b_y2Y15nUS2UN_8ximMo6r_-_4IQpmct2ol-e-g,5730
 datachain/lib/data_model.py,sha256=qfTtQNncS5pt9SvXdMEa5kClniaT6XBGBfO7onEz2TI,1632
 datachain/lib/dataset_info.py,sha256=lONGr71ozo1DS4CQEhnpKORaU4qFb6Ketv8Xm8CVm2U,2188
-datachain/lib/dc.py,sha256=bZx7VJ389SJ5gRTkckFD044LHq_hOgHqvhTD7gJoBZY,56963
-datachain/lib/file.py,sha256=MCklths3w9SgQTR0LACnDohfGdEc3t30XD0qNq1oTlI,12000
+datachain/lib/dc.py,sha256=bU45N7vBlxSyS6bpe0ShQ1c0DpXKFVfNcFcvbBrE1Ag,58011
+datachain/lib/file.py,sha256=ZHpdilDPYCob8uqtwUPtBvBNxVvQRq4AC_0IGg5m-G4,12003
 datachain/lib/image.py,sha256=TgYhRhzd4nkytfFMeykQkPyzqb5Le_-tU81unVMPn4Q,2328
 datachain/lib/meta_formats.py,sha256=jlSYWRUeDMjun_YCsQ2JxyaDJpEpokzHDPmKUAoCXnU,7034
 datachain/lib/model_store.py,sha256=c4USXsBBjrGH8VOh4seIgOiav-qHOwdoixtxfLgU63c,2409
@@ -55,22 +55,22 @@ datachain/lib/udf.py,sha256=IjuDt2B8E3xEHhcJnaK_ZhmivdrOYPXz5uf7ylpktws,11815
 datachain/lib/udf_signature.py,sha256=gMStcEeYJka5M6cg50Z9orC6y6HzCAJ3MkFqqn1fjZg,7137
 datachain/lib/utils.py,sha256=5-kJlAZE0D9nXXweAjo7-SP_AWGo28feaDByONYaooQ,463
 datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datachain/lib/webdataset.py,sha256=nIa6ubv94CwnATeeSdE7f_F9Zkz9LuBTfbXvFg3_-Ak,8295
+datachain/lib/webdataset.py,sha256=SsjCKLSKEkHRRfeTHQhjoGqNPqIWw_SCWQcUwgUWWP0,8282
 datachain/lib/webdataset_laion.py,sha256=PQP6tQmUP7Xu9fPuAGK1JDBYA6T5UufYMUTGaxgspJA,2118
 datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/lib/convert/flatten.py,sha256=YMoC00BqEy3zSpvCp6Q0DfxihuPmgjUJj1g2cesWGPs,1790
 datachain/lib/convert/python_to_sql.py,sha256=4gplGlr_Kg-Z40OpJUzJiarDWj7pwbUOk-dPOYYCJ9Q,2629
-datachain/lib/convert/sql_to_python.py,sha256=HK414fexSQ4Ur-OY7_pKvDKEGdtos1CeeAFa4RxH4nU,532
+datachain/lib/convert/sql_to_python.py,sha256=lGnKzSF_tz9Y_5SSKkrIU95QEjpcDzvOxIRkEKTQag0,443
 datachain/lib/convert/unflatten.py,sha256=Ogvh_5wg2f38_At_1lN0D_e2uZOOpYEvwvB2xdq56Tw,2012
 datachain/lib/convert/values_to_tuples.py,sha256=aVoHWMOUGLAiS6_BBwKJqVIne91VffOW6-dWyNE7oHg,3715
 datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
 datachain/query/batch.py,sha256=j-_ZcuQra2Ro3Wj4crtqQCg-7xuv-p84hr4QHdvT7as,3479
-datachain/query/builtins.py,sha256=ZKNs49t8Oa_OaboCBIEqtXZt7c1Qe9OR_C_HpoDriIU,2781
-datachain/query/dataset.py,sha256=-AGkz3-K_b-2YBJCMqQz-Qq7FKzMcScPty_77S0AQtE,59938
+datachain/query/builtins.py,sha256=EmKPYsoQ46zwdyOn54MuCzvYFmfsBn5F8zyF7UBUfrc,2550
+datachain/query/dataset.py,sha256=nfRRz6mkUz0tcD084rx-ps4PUWnZr5JQlIlRUF-PpSc,59919
 datachain/query/dispatch.py,sha256=oGX9ZuoKWPB_EyqAZD_eULcO3OejY44_keSmFS6SHT0,13315
 datachain/query/metrics.py,sha256=vsECqbZfoSDBnvC3GQlziKXmISVYDLgHP1fMPEOtKyo,640
 datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
-datachain/query/schema.py,sha256=hAvux_GxUmuG_PwtnKkkizld9f0Gvt2JBzbu3m74fvE,7840
+datachain/query/schema.py,sha256=O3mTM5DRjvRAJCI7O9mR8wOdFJbgI1jIjvtfl5YvjI4,7755
 datachain/query/session.py,sha256=qTzkXgwMJdJhal3rVt3hdv3x1EXT1IHuXcwkC-Ex0As,4111
 datachain/query/udf.py,sha256=c0IOTkcedpOQEmX-Idlrrl1__1IecNXL0N9oUO9Dtkg,7755
 datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -81,20 +81,20 @@ datachain/sql/types.py,sha256=SShudhdIpdfTKDxWDDqOajYRkTCkIgQbilA94g4i-4E,10389
 datachain/sql/utils.py,sha256=rzlJw08etivdrcuQPqNVvVWhuVSyUPUQEEc6DOhu258,818
 datachain/sql/default/__init__.py,sha256=XQ2cEZpzWiABqjV-6yYHUBGI9vN_UHxbxZENESmVAWw,45
 datachain/sql/default/base.py,sha256=h44005q3qtMc9cjWmRufWwcBr5CfK_dnvG4IrcSQs_8,536
-datachain/sql/functions/__init__.py,sha256=PP8XV1CC1naIu87fiExbJRpV0Rww47EcDrDIKJb_xBQ,368
-datachain/sql/functions/array.py,sha256=rvH27SWN9gdh_mFnp0GIiXuCrNW6n8ZbY4I_JUS-_e0,1140
+datachain/sql/functions/__init__.py,sha256=Ioyy7nSetrTLVnHGcGcmZU99HxUFcx-5PFbrh2dPNH0,396
+datachain/sql/functions/array.py,sha256=EB7nJSncUc1PuxlHyzU2gVhF8DuXaxpGlxb5e8X2KFY,1297
 datachain/sql/functions/conditional.py,sha256=q7YUKfunXeEldXaxgT-p5pUTcOEVU_tcQ2BJlquTRPs,207
 datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0mg,1294
 datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
 datachain/sql/functions/string.py,sha256=hIrF1fTvlPamDtm8UMnWDcnGfbbjCsHxZXS30U2Rzxo,651
 datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
-datachain/sql/sqlite/base.py,sha256=Jb1csbIARjEvwbylnvgNA7ChozSyoL3CQzOGBUf8QAw,12067
+datachain/sql/sqlite/base.py,sha256=LBYmXqXsVF30fbcnR55evCZHbPDCzMdGk_ogPLps63s,12236
 datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
 datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
 datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
-datachain-0.2.17.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
-datachain-0.2.17.dist-info/METADATA,sha256=STR0-4R9NOW55GgadrPA_-fmx5-WckcwhTmyH_OgaUs,17269
-datachain-0.2.17.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
-datachain-0.2.17.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
-datachain-0.2.17.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
-datachain-0.2.17.dist-info/RECORD,,
+datachain-0.3.0.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
+datachain-0.3.0.dist-info/METADATA,sha256=x0jqtxoQE9ynjAAKFeyrz0rvyuv_E2e0D6UuhU3Yu_I,17268
+datachain-0.3.0.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
+datachain-0.3.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
+datachain-0.3.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
+datachain-0.3.0.dist-info/RECORD,,

{datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datachain-0.2.17.dist-info → datachain-0.3.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

datachain 0.2.17__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

datachain 0.2.17py3-none-any.whl → 0.3.0py3-none-any.whl