PyPI - datachain - Versions diffs - 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl - Mend

datachain 0.3.11py3-none-any.whl → 0.3.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (28) hide show

datachain/cache.py +0 -1
datachain/catalog/catalog.py +50 -153
datachain/cli.py +4 -6
datachain/client/fsspec.py +0 -1
datachain/client/s3.py +0 -4
datachain/data_storage/schema.py +4 -8
datachain/data_storage/warehouse.py +6 -17
datachain/error.py +0 -4
datachain/lib/clip.py +1 -1
datachain/lib/dc.py +17 -5
datachain/lib/file.py +9 -11
datachain/lib/image.py +1 -1
datachain/lib/meta_formats.py +4 -8
datachain/lib/model_store.py +6 -1
datachain/lib/text.py +1 -1
datachain/lib/webdataset.py +13 -0
datachain/lib/webdataset_laion.py +13 -0
datachain/listing.py +2 -2
datachain/node.py +4 -26
datachain/query/builtins.py +0 -14
datachain/query/schema.py +1 -16
datachain/utils.py +0 -3
{datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/METADATA +6 -7
{datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/RECORD +28 -28
{datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/LICENSE +0 -0
{datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/WHEEL +0 -0
{datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/entry_points.txt +0 -0
{datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/top_level.txt +0 -0

datachain/cache.py CHANGED Viewed

@@ -29,7 +29,6 @@ class UniqueId:
     etag: str
     version: str = ""
     is_latest: bool = True
-    vtype: str = ""
     location: Optional[str] = None
     last_modified: datetime = TIME_ZERO

datachain/catalog/catalog.py CHANGED Viewed

@@ -12,7 +12,6 @@ import sys
 import time
 import traceback
 from collections.abc import Iterable, Iterator, Mapping, Sequence
-from contextlib import contextmanager, nullcontext
 from copy import copy
 from dataclasses import dataclass
 from functools import cached_property, reduce
@@ -23,7 +22,6 @@ from typing import (
     TYPE_CHECKING,
     Any,
     Callable,
-    NamedTuple,
     NoReturn,
     Optional,
     Union,
@@ -58,14 +56,13 @@ from datachain.error import (
     PendingIndexingError,
     QueryScriptCancelError,
     QueryScriptCompileError,
-    QueryScriptDatasetNotFound,
     QueryScriptRunError,
 )
 from datachain.listing import Listing
 from datachain.node import DirType, Node, NodeWithPath
 from datachain.nodes_thread_pool import NodesThreadPool
 from datachain.remote.studio import StudioClient
-from datachain.sql.types import JSON, Boolean, DateTime, Int, Int64, SQLType, String
+from datachain.sql.types import JSON, Boolean, DateTime, Int64, SQLType, String
 from datachain.storage import Storage, StorageStatus, StorageURI
 from datachain.utils import (
     DataChainDir,
@@ -115,44 +112,19 @@ def noop(_: str):
     pass
-@contextmanager
-def print_and_capture(
-    stream: "IO[bytes]|IO[str]", callback: Callable[[str], None] = noop
-) -> "Iterator[list[str]]":
-    lines: list[str] = []
-    append = lines.append
+def _process_stream(stream: "IO[bytes]", callback: Callable[[str], None]) -> None:
+    buffer = b""
+    while byt := stream.read(1):  # Read one byte at a time
+        buffer += byt
-    def loop() -> None:
-        buffer = b""
-        while byt := stream.read(1):  # Read one byte at a time
-            buffer += byt.encode("utf-8") if isinstance(byt, str) else byt
-            if byt in (b"\n", b"\r"):  # Check for newline or carriage return
-                line = buffer.decode("utf-8")
-                print(line, end="")
-                callback(line)
-                append(line)
-                buffer = b""  # Clear buffer for next line
-        if buffer:  # Handle any remaining data in the buffer
+        if byt in (b"\n", b"\r"):  # Check for newline or carriage return
             line = buffer.decode("utf-8")
-            print(line, end="")
             callback(line)
-            append(line)
-    thread = Thread(target=loop, daemon=True)
-    thread.start()
-    try:
-        yield lines
-    finally:
-        thread.join()
+            buffer = b""  # Clear buffer for next line
-class QueryResult(NamedTuple):
-    dataset: Optional[DatasetRecord]
-    version: Optional[int]
-    output: str
+    if buffer:  # Handle any remaining data in the buffer
+        line = buffer.decode("utf-8")
+        callback(line)
 class DatasetRowsFetcher(NodesThreadPool):
@@ -541,8 +513,6 @@ def find_column_to_str(  # noqa: PLR0911
         )
     if column == "name":
         return posixpath.basename(row[field_lookup["path"]]) or ""
-    if column == "owner":
-        return row[field_lookup["owner_name"]] or ""
     if column == "path":
         is_dir = row[field_lookup["dir_type"]] == DirType.DIR
         path = row[field_lookup["path"]]
@@ -651,11 +621,6 @@ class Catalog:
                 code_ast.body[-1:] = new_expressions
         return code_ast
-    def compile_query_script(self, script: str) -> str:
-        code_ast = ast.parse(script)
-        code_ast = self.attach_query_wrapper(code_ast)
-        return ast.unparse(code_ast)
     def parse_url(self, uri: str, **config: Any) -> tuple[Client, str]:
         config = config or self.client_config
         return Client.parse_url(uri, self.cache, **config)
@@ -699,16 +664,12 @@ class Catalog:
         source_metastore = self.metastore.clone(client.uri)
         columns = [
-            Column("vtype", String),
-            Column("dir_type", Int),
             Column("path", String),
             Column("etag", String),
             Column("version", String),
             Column("is_latest", Boolean),
             Column("last_modified", DateTime(timezone=True)),
             Column("size", Int64),
-            Column("owner_name", String),
-            Column("owner_id", String),
             Column("location", JSON),
             Column("source", String),
         ]
@@ -1549,7 +1510,6 @@ class Catalog:
             row["etag"],
             row["version"],
             row["is_latest"],
-            row["vtype"],
             row["location"],
             row["last_modified"],
         )
@@ -1805,14 +1765,15 @@ class Catalog:
     def query(
         self,
         query_script: str,
-        envs: Optional[Mapping[str, str]] = None,
-        python_executable: Optional[str] = None,
+        env: Optional[Mapping[str, str]] = None,
+        python_executable: str = sys.executable,
         save: bool = False,
         capture_output: bool = True,
         output_hook: Callable[[str], None] = noop,
         params: Optional[dict[str, str]] = None,
         job_id: Optional[str] = None,
-    ) -> QueryResult:
+        _execute_last_expression: bool = False,
+    ) -> None:
         """
         Method to run custom user Python script to run a query and, as result,
         creates new dataset from the results of a query.
@@ -1835,92 +1796,21 @@ class Catalog:
                 C.size > 1000
             )
         """
-        if not job_id:
-            python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
-            job_id = self.metastore.create_job(
-                name="",
-                query=query_script,
-                params=params,
-                python_version=python_version,
-            )
-        lines, proc = self.run_query(
-            python_executable or sys.executable,
-            query_script,
-            envs,
-            capture_output,
-            output_hook,
-            params,
-            save,
-            job_id,
-        )
-        output = "".join(lines)
-        if proc.returncode:
-            if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
-                raise QueryScriptCancelError(
-                    "Query script was canceled by user",
-                    return_code=proc.returncode,
-                    output=output,
-                )
-            raise QueryScriptRunError(
-                f"Query script exited with error code {proc.returncode}",
-                return_code=proc.returncode,
-                output=output,
-            )
-        def _get_dataset_versions_by_job_id():
-            for dr, dv, job in self.list_datasets_versions():
-                if job and str(job.id) == job_id:
-                    yield dr, dv
-        try:
-            dr, dv = max(
-                _get_dataset_versions_by_job_id(), key=lambda x: x[1].created_at
-            )
-        except ValueError as e:
-            if not save:
-                return QueryResult(dataset=None, version=None, output=output)
-            raise QueryScriptDatasetNotFound(
-                "No dataset found after running Query script",
-                output=output,
-            ) from e
-        dr = self.update_dataset(
-            dr,
-            script_output=output,
-            query_script=query_script,
-        )
-        self.update_dataset_version_with_warehouse_info(
-            dr,
-            dv.version,
-            script_output=output,
-            query_script=query_script,
-            job_id=job_id,
-            is_job_result=True,
-        )
-        return QueryResult(dataset=dr, version=dv.version, output=output)
+        if _execute_last_expression:
+            try:
+                code_ast = ast.parse(query_script)
+                code_ast = self.attach_query_wrapper(code_ast)
+                query_script_compiled = ast.unparse(code_ast)
+            except Exception as exc:
+                raise QueryScriptCompileError(
+                    f"Query script failed to compile, reason: {exc}"
+                ) from exc
+        else:
+            query_script_compiled = query_script
+            assert not save
-    def run_query(
-        self,
-        python_executable: str,
-        query_script: str,
-        envs: Optional[Mapping[str, str]],
-        capture_output: bool,
-        output_hook: Callable[[str], None],
-        params: Optional[dict[str, str]],
-        save: bool,
-        job_id: Optional[str],
-    ) -> tuple[list[str], subprocess.Popen]:
-        try:
-            query_script_compiled = self.compile_query_script(query_script)
-        except Exception as exc:
-            raise QueryScriptCompileError(
-                f"Query script failed to compile, reason: {exc}"
-            ) from exc
-        envs = dict(envs or os.environ)
-        envs.update(
+        env = dict(env or os.environ)
+        env.update(
             {
                 "DATACHAIN_QUERY_PARAMS": json.dumps(params or {}),
                 "PYTHONPATH": os.getcwd(),  # For local imports
@@ -1929,19 +1819,28 @@ class Catalog:
                 "DATACHAIN_JOB_ID": job_id or "",
             },
         )
-        with subprocess.Popen(  # noqa: S603
-            [python_executable, "-c", query_script_compiled],
-            env=envs,
-            stdout=subprocess.PIPE if capture_output else None,
-            stderr=subprocess.STDOUT if capture_output else None,
-            bufsize=1,
-            text=False,
-        ) as proc:
-            out = proc.stdout
-            _lines: list[str] = []
-            ctx = print_and_capture(out, output_hook) if out else nullcontext(_lines)
-            with ctx as lines:
-                return lines, proc
+        popen_kwargs = {}
+        if capture_output:
+            popen_kwargs = {"stdout": subprocess.PIPE, "stderr": subprocess.STDOUT}
+        cmd = [python_executable, "-c", query_script_compiled]
+        with subprocess.Popen(cmd, env=env, **popen_kwargs) as proc:  # type: ignore[call-overload]  # noqa: S603
+            if capture_output:
+                args = (proc.stdout, output_hook)
+                thread = Thread(target=_process_stream, args=args, daemon=True)
+                thread.start()
+                thread.join()  # wait for the reader thread
+        if proc.returncode == QUERY_SCRIPT_CANCELED_EXIT_CODE:
+            raise QueryScriptCancelError(
+                "Query script was canceled by user",
+                return_code=proc.returncode,
+            )
+        if proc.returncode:
+            raise QueryScriptRunError(
+                f"Query script exited with error code {proc.returncode}",
+                return_code=proc.returncode,
+            )
     def cp(
         self,
@@ -2081,8 +1980,6 @@ class Catalog:
                 field_set.add("path")
             elif column == "name":
                 field_set.add("path")
-            elif column == "owner":
-                field_set.add("owner_name")
             elif column == "path":
                 field_set.add("dir_type")
                 field_set.add("path")

datachain/cli.py CHANGED Viewed

@@ -24,7 +24,7 @@ logger = logging.getLogger("datachain")
 TTL_HUMAN = "4h"
 TTL_INT = 4 * 60 * 60
-FIND_COLUMNS = ["du", "name", "owner", "path", "size", "type"]
+FIND_COLUMNS = ["du", "name", "path", "size", "type"]
 def human_time_type(value_str: str, can_be_none: bool = False) -> Optional[int]:
@@ -579,9 +579,8 @@ def _node_data_to_ls_values(row, long_format=False):
     value = name + ending
     if long_format:
         last_modified = row[2]
-        owner_name = row[3]
         timestamp = last_modified if not is_dir else None
-        return long_line_str(value, timestamp, owner_name)
+        return long_line_str(value, timestamp)
     return value
@@ -599,7 +598,7 @@ def _ls_urls_flat(
         if client_cls.is_root_url(source):
             buckets = client_cls.ls_buckets(**catalog.client_config)
             if long:
-                values = (long_line_str(b.name, b.created, "") for b in buckets)
+                values = (long_line_str(b.name, b.created) for b in buckets)
             else:
                 values = (b.name for b in buckets)
             yield source, values
@@ -607,7 +606,7 @@ def _ls_urls_flat(
             found = False
             fields = ["name", "dir_type"]
             if long:
-                fields.extend(["last_modified", "owner_name"])
+                fields.append("last_modified")
             for data_source, results in catalog.ls([source], fields=fields, **kwargs):
                 values = (_node_data_to_ls_values(r, long) for r in results)
                 found = True
@@ -683,7 +682,6 @@ def ls_remote(
                 entry = long_line_str(
                     row["name"] + ("/" if row["dir_type"] else ""),
                     row["last_modified"],
-                    row["owner_name"],
                 )
                 print(format_ls_entry(entry))
         else:

datachain/client/fsspec.py CHANGED Viewed

@@ -363,7 +363,6 @@ class Client(ABC):
             parent["path"],
             parent["size"],
             parent["etag"],
-            vtype=parent["vtype"],
             location=parent["location"],
         )
         f = self.open_object(parent_uid, use_cache=use_cache)

datachain/client/s3.py CHANGED Viewed

@@ -119,8 +119,6 @@ class ClientS3(Client):
             is_latest=v.get("IsLatest", True),
             last_modified=v.get("LastModified", ""),
             size=v["Size"],
-            owner_name=v.get("Owner", {}).get("DisplayName", ""),
-            owner_id=v.get("Owner", {}).get("ID", ""),
         )
     async def _fetch_dir(
@@ -165,8 +163,6 @@ class ClientS3(Client):
             is_latest=v.get("IsLatest", True),
             last_modified=v.get("LastModified", ""),
             size=v["size"],
-            owner_name=v.get("Owner", {}).get("DisplayName", ""),
-            owner_id=v.get("Owner", {}).get("ID", ""),
         )
     def info_to_file(self, v: dict[str, Any], path: str) -> File:

datachain/data_storage/schema.py CHANGED Viewed

@@ -10,9 +10,8 @@ from typing import (
 import sqlalchemy as sa
 from sqlalchemy.sql import func as f
-from sqlalchemy.sql.expression import null, true
+from sqlalchemy.sql.expression import false, null, true
-from datachain.node import DirType
 from datachain.sql.functions import path
 from datachain.sql.types import Int, SQLType, UInt64
@@ -81,8 +80,7 @@ class DirExpansion:
     def base_select(q):
         return sa.select(
             q.c.sys__id,
-            q.c.vtype,
-            (q.c.dir_type == DirType.DIR).label("is_dir"),
+            false().label("is_dir"),
             q.c.source,
             q.c.path,
             q.c.version,
@@ -94,7 +92,6 @@ class DirExpansion:
         return (
             sa.select(
                 f.min(q.c.sys__id).label("sys__id"),
-                q.c.vtype,
                 q.c.is_dir,
                 q.c.source,
                 q.c.path,
@@ -102,8 +99,8 @@ class DirExpansion:
                 f.max(q.c.location).label("location"),
             )
             .select_from(q)
-            .group_by(q.c.source, q.c.path, q.c.vtype, q.c.is_dir, q.c.version)
-            .order_by(q.c.source, q.c.path, q.c.vtype, q.c.is_dir, q.c.version)
+            .group_by(q.c.source, q.c.path, q.c.is_dir, q.c.version)
+            .order_by(q.c.source, q.c.path, q.c.is_dir, q.c.version)
         )
     @classmethod
@@ -113,7 +110,6 @@ class DirExpansion:
         q = q.union_all(
             sa.select(
                 sa.literal(-1).label("sys__id"),
-                sa.literal("").label("vtype"),
                 true().label("is_dir"),
                 q.c.source,
                 parent.label("path"),

datachain/data_storage/warehouse.py CHANGED Viewed

@@ -28,7 +28,6 @@ from datachain.utils import sql_escape_like
 if TYPE_CHECKING:
     from sqlalchemy.sql._typing import _ColumnsClauseArgument
-    from sqlalchemy.sql.elements import ColumnElement
     from sqlalchemy.sql.selectable import Select
     from sqlalchemy.types import TypeEngine
@@ -341,9 +340,7 @@ class AbstractWarehouse(ABC, Serializable):
         column_objects = [dr.c[c] for c in column_names]
         # include all object types - file, tar archive, tar file (subobject)
-        select_query = dr.select(*column_objects).where(
-            dr.c.dir_type.in_(DirTypeGroup.FILE) & (dr.c.is_latest == true())
-        )
+        select_query = dr.select(*column_objects).where(dr.c.is_latest == true())
         if path is None:
             return select_query
         if recursive:
@@ -420,7 +417,6 @@ class AbstractWarehouse(ABC, Serializable):
         """
         def _prepare_entry(entry: Entry):
-            assert entry.dir_type is not None
             return attrs.asdict(entry) | {"source": uri}
         return [_prepare_entry(e) for e in entries]
@@ -440,7 +436,7 @@ class AbstractWarehouse(ABC, Serializable):
         """Inserts dataset rows directly into dataset table"""
     @abstractmethod
-    def instr(self, source, target) -> "ColumnElement":
+    def instr(self, source, target) -> sa.ColumnElement:
         """
         Return SQLAlchemy Boolean determining if a target substring is present in
         source string column
@@ -500,7 +496,7 @@ class AbstractWarehouse(ABC, Serializable):
         c = query.selected_columns
         q = query.where(c.dir_type.in_(file_group))
         if not include_subobjects:
-            q = q.where(c.vtype == "")
+            q = q.where((c.location == "") | (c.location.is_(None)))
         return q
     def get_nodes(self, query) -> Iterator[Node]:
@@ -624,8 +620,7 @@ class AbstractWarehouse(ABC, Serializable):
         return sa.select(
             de.c.sys__id,
-            with_default(dr.c.vtype),
-            case((de.c.is_dir == true(), DirType.DIR), else_=dr.c.dir_type).label(
+            case((de.c.is_dir == true(), DirType.DIR), else_=DirType.FILE).label(
                 "dir_type"
             ),
             de.c.path,
@@ -634,8 +629,6 @@ class AbstractWarehouse(ABC, Serializable):
             with_default(dr.c.is_latest),
             dr.c.last_modified,
             with_default(dr.c.size),
-            with_default(dr.c.owner_name),
-            with_default(dr.c.owner_id),
             with_default(dr.c.sys__rand),
             dr.c.location,
             de.c.source,
@@ -650,7 +643,6 @@ class AbstractWarehouse(ABC, Serializable):
             query = dr.select().where(
                 self.path_expr(dr) == path,
                 dr.c.is_latest == true(),
-                dr.c.dir_type != DirType.DIR,
             )
             row = next(self.db.execute(query), None)
             if row is not None:
@@ -660,7 +652,6 @@ class AbstractWarehouse(ABC, Serializable):
             dr.select()
             .where(
                 dr.c.is_latest == true(),
-                dr.c.dir_type != DirType.DIR,
                 dr.c.path.startswith(path),
             )
             .exists()
@@ -761,13 +752,11 @@ class AbstractWarehouse(ABC, Serializable):
         sub_glob = posixpath.join(path, "*")
         dr = dataset_rows
-        selections = [
+        selections: list[sa.ColumnElement] = [
             func.sum(dr.c.size),
         ]
         if count_files:
-            selections.append(
-                func.sum(dr.c.dir_type.in_(DirTypeGroup.FILE)),
-            )
+            selections.append(func.count())
         results = next(
             self.db.execute(
                 dr.select(*selections).where(

datachain/error.py CHANGED Viewed

@@ -42,10 +42,6 @@ class QueryScriptRunError(Exception):
         super().__init__(self.message)
-class QueryScriptDatasetNotFound(QueryScriptRunError):  # noqa: N818
-    pass
 class QueryScriptCancelError(QueryScriptRunError):
     pass

datachain/lib/clip.py CHANGED Viewed

@@ -18,7 +18,7 @@ def _get_encoder(model: Any, type: Literal["image", "text"]) -> Callable:
         hasattr(model, method_name) and inspect.ismethod(getattr(model, method_name))
     ):
         method = getattr(model, method_name)
-        return lambda x: method(torch.tensor(x))
+        return lambda x: method(torch.as_tensor(x).clone().detach())
     # Check for model from clip or open_clip library
     method_name = f"encode_{type}"

datachain/lib/dc.py CHANGED Viewed

@@ -234,7 +234,6 @@ class DataChain(DatasetQuery):
     DEFAULT_FILE_RECORD: ClassVar[dict] = {
         "source": "",
         "path": "",
-        "vtype": "",
         "size": 0,
     }
@@ -415,7 +414,7 @@ class DataChain(DatasetQuery):
                 .save(list_dataset_name, listing=True)
             )
-        dc = cls.from_dataset(list_dataset_name, session=session)
+        dc = cls.from_dataset(list_dataset_name, session=session, settings=settings)
         dc.signals_schema = dc.signals_schema.mutate({f"{object_name}": file_type})
         return ls(dc, list_path, recursive=recursive, object_name=object_name)
@@ -426,6 +425,7 @@ class DataChain(DatasetQuery):
         name: str,
         version: Optional[int] = None,
         session: Optional[Session] = None,
+        settings: Optional[dict] = None,
     ) -> "DataChain":
         """Get data from a saved Dataset. It returns the chain itself.
@@ -438,7 +438,7 @@ class DataChain(DatasetQuery):
             chain = DataChain.from_dataset("my_cats")
             ```
         """
-        return DataChain(name=name, version=version, session=session)
+        return DataChain(name=name, version=version, session=session, settings=settings)
     @classmethod
     def from_json(
@@ -1622,6 +1622,8 @@ class DataChain(DatasetQuery):
         model_name: str = "",
         source: bool = True,
         nrows=None,
+        session: Optional[Session] = None,
+        settings: Optional[dict] = None,
         **kwargs,
     ) -> "DataChain":
         """Generate chain from csv files.
@@ -1638,6 +1640,8 @@ class DataChain(DatasetQuery):
             model_name : Generated model name.
             source : Whether to include info about the source file.
             nrows : Optional row limit.
+            session : Session to use for the chain.
+            settings : Settings to use for the chain.
         Example:
             Reading a csv file:
@@ -1654,7 +1658,9 @@ class DataChain(DatasetQuery):
         from pyarrow.csv import ConvertOptions, ParseOptions, ReadOptions
         from pyarrow.dataset import CsvFileFormat
-        chain = DataChain.from_storage(path, **kwargs)
+        chain = DataChain.from_storage(
+            path, session=session, settings=settings, **kwargs
+        )
         column_names = None
         if not header:
@@ -1701,6 +1707,8 @@ class DataChain(DatasetQuery):
         object_name: str = "",
         model_name: str = "",
         source: bool = True,
+        session: Optional[Session] = None,
+        settings: Optional[dict] = None,
         **kwargs,
     ) -> "DataChain":
         """Generate chain from parquet files.
@@ -1713,6 +1721,8 @@ class DataChain(DatasetQuery):
             object_name : Created object column name.
             model_name : Generated model name.
             source : Whether to include info about the source file.
+            session : Session to use for the chain.
+            settings : Settings to use for the chain.
         Example:
             Reading a single file:
@@ -1725,7 +1735,9 @@ class DataChain(DatasetQuery):
             dc = DataChain.from_parquet("s3://mybucket/dir")
             ```
         """
-        chain = DataChain.from_storage(path, **kwargs)
+        chain = DataChain.from_storage(
+            path, session=session, settings=settings, **kwargs
+        )
         return chain.parse_tabular(
             output=output,
             object_name=object_name,

datachain/lib/file.py CHANGED Viewed

@@ -118,7 +118,6 @@ class File(DataModel):
     is_latest: bool = Field(default=True)
     last_modified: datetime = Field(default=TIME_ZERO)
     location: Optional[Union[dict, list[dict]]] = Field(default=None)
-    vtype: str = Field(default="")
     _datachain_column_types: ClassVar[dict[str, Any]] = {
         "source": String,
@@ -129,7 +128,6 @@ class File(DataModel):
         "is_latest": Boolean,
         "last_modified": DateTime,
         "location": JSON,
-        "vtype": String,
     }
     _unique_id_keys: ClassVar[list[str]] = [
@@ -139,7 +137,6 @@ class File(DataModel):
         "etag",
         "version",
         "is_latest",
-        "vtype",
         "location",
         "last_modified",
     ]
@@ -195,14 +192,15 @@ class File(DataModel):
             with VFileRegistry.resolve(self, self.location) as f:  # type: ignore[arg-type]
                 yield f
-        uid = self.get_uid()
-        client = self._catalog.get_client(self.source)
-        if self._caching_enabled:
-            client.download(uid, callback=self._download_cb)
-        with client.open_object(
-            uid, use_cache=self._caching_enabled, cb=self._download_cb
-        ) as f:
-            yield io.TextIOWrapper(f) if mode == "r" else f
+        else:
+            uid = self.get_uid()
+            client = self._catalog.get_client(self.source)
+            if self._caching_enabled:
+                client.download(uid, callback=self._download_cb)
+            with client.open_object(
+                uid, use_cache=self._caching_enabled, cb=self._download_cb
+            ) as f:
+                yield io.TextIOWrapper(f) if mode == "r" else f
     def read(self, length: int = -1):
         """Returns file contents."""

datachain/lib/image.py CHANGED Viewed

@@ -34,7 +34,7 @@ def convert_image(
             from transformers.image_processing_utils import BaseImageProcessor
             if isinstance(transform, BaseImageProcessor):
-                img = torch.tensor(img.pixel_values[0])  # type: ignore[assignment,attr-defined]
+                img = torch.as_tensor(img.pixel_values[0]).clone().detach()  # type: ignore[assignment,attr-defined]
         except ImportError:
             pass
         if device:

datachain/lib/meta_formats.py CHANGED Viewed

@@ -1,13 +1,10 @@
-# pip install datamodel-code-generator
-# pip install jmespath
-#
 import csv
 import json
 import tempfile
 import uuid
 from collections.abc import Iterator
 from pathlib import Path
-from typing import Any, Callable
+from typing import Callable
 import datamodel_code_generator
 import jmespath as jsp
@@ -85,7 +82,6 @@ def read_schema(source_file, data_type="csv", expr=None, model_name=None):
             use_standard_collections=True,
         )
         epilogue = f"""
-{model_name}.model_rebuild()
 DataModel.register({model_name})
 spec = {model_name}
 """
@@ -122,9 +118,9 @@ def read_meta(  # noqa: C901
             print(f"{model_output}")
         # Below 'spec' should be a dynamically converted DataModel from Pydantic
         if not spec:
-            local_vars: dict[str, Any] = {}
-            exec(model_output, globals(), local_vars)  # type: ignore[arg-type] # noqa: S102
-            spec = local_vars["spec"]
+            gl = globals()
+            exec(model_output, gl)  # type: ignore[arg-type] # noqa: S102
+            spec = gl["spec"]
     if not (spec) and not (schema_from):
         raise ValueError(

datachain/lib/model_store.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import inspect
 import logging
 from typing import ClassVar, Optional
@@ -69,7 +70,11 @@ class ModelStore:
     @staticmethod
     def is_pydantic(val):
-        return not hasattr(val, "__origin__") and issubclass(val, BaseModel)
+        return (
+            not hasattr(val, "__origin__")
+            and inspect.isclass(val)
+            and issubclass(val, BaseModel)
+        )
     @staticmethod
     def to_pydantic(val) -> Optional[type[BaseModel]]:

datachain/lib/text.py CHANGED Viewed

@@ -33,7 +33,7 @@ def convert_text(
         res = tokenizer(text)
     tokens = res.input_ids if isinstance(tokenizer, PreTrainedTokenizerBase) else res
-    tokens = torch.tensor(tokens)
+    tokens = torch.as_tensor(tokens).clone().detach()
     if device:
         tokens = tokens.to(device)

datachain/lib/webdataset.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import hashlib
 import json
 import tarfile
+import warnings
 from collections.abc import Iterator, Sequence
 from pathlib import Path
 from typing import (
@@ -19,6 +20,18 @@ from datachain.lib.data_model import DataModel
 from datachain.lib.file import File, TarVFile
 from datachain.lib.utils import DataChainError
+# The `json` method of the Pydantic `BaseModel` class has been deprecated
+# and will be removed in Pydantic v3. For more details, see:
+# https://github.com/pydantic/pydantic/issues/10033
+# Until then, we can ignore the warning.
+warnings.filterwarnings(
+    "ignore",
+    category=UserWarning,
+    message=(
+        'Field name "json" in "WDSAllFile" shadows an attribute in parent "WDSBasic"'
+    ),
+)
 class WDSError(DataChainError):
     def __init__(self, tar_stream, message: str):

datachain/lib/webdataset_laion.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import warnings
 from collections.abc import Iterator
 from typing import Optional
@@ -7,6 +8,18 @@ from pydantic import BaseModel, Field
 from datachain.lib.file import File
 from datachain.lib.webdataset import WDSBasic, WDSReadableSubclass
+# The `json` method of the Pydantic `BaseModel` class has been deprecated
+# and will be removed in Pydantic v3. For more details, see:
+# https://github.com/pydantic/pydantic/issues/10033
+# Until then, we can ignore the warning.
+warnings.filterwarnings(
+    "ignore",
+    category=UserWarning,
+    message=(
+        'Field name "json" in "WDSLaion" shadows an attribute in parent "WDSBasic"'
+    ),
+)
 class Laion(WDSReadableSubclass):
     uid: str = Field(default="")

datachain/listing.py CHANGED Viewed

@@ -104,7 +104,7 @@ class Listing:
         return self.warehouse.get_node_by_path(self.dataset_rows, path)
     def ls_path(self, node, fields):
-        if node.vtype == "tar" or node.dir_type == DirType.TAR_ARCHIVE:
+        if node.location or node.dir_type == DirType.TAR_ARCHIVE:
             return self.warehouse.select_node_fields_by_parent_path_tar(
                 self.dataset_rows, node.path, fields
             )
@@ -235,7 +235,7 @@ class Listing:
         return self.warehouse.size(self.dataset_rows, node, count_files)
     def subtree_files(self, node: Node, sort=None):
-        if node.dir_type == DirType.TAR_ARCHIVE or node.vtype != "":
+        if node.dir_type == DirType.TAR_ARCHIVE or node.location:
             include_subobjects = True
         else:
             include_subobjects = False

datachain/node.py CHANGED Viewed

@@ -49,18 +49,15 @@ class DirTypeGroup:
 class Node:
     sys__id: int = 0
     sys__rand: int = 0
-    vtype: str = ""
-    dir_type: Optional[int] = None
     path: str = ""
     etag: str = ""
     version: Optional[str] = None
     is_latest: bool = True
     last_modified: Optional[datetime] = None
     size: int = 0
-    owner_name: str = ""
-    owner_id: str = ""
     location: Optional[str] = None
     source: StorageURI = StorageURI("")
+    dir_type: int = DirType.FILE
     @property
     def is_dir(self) -> bool:
@@ -113,7 +110,6 @@ class Node:
             version=self.version or "",
             etag=self.etag,
             is_latest=self.is_latest,
-            vtype=self.vtype,
             location=self.location,
             last_modified=self.last_modified or TIME_ZERO,
         )
@@ -145,38 +141,20 @@ class Node:
 @attrs.define
 class Entry:
-    vtype: str = ""
-    dir_type: Optional[int] = None
     path: str = ""
     etag: str = ""
     version: str = ""
     is_latest: bool = True
     last_modified: Optional[datetime] = None
     size: int = 0
-    owner_name: str = ""
-    owner_id: str = ""
     location: Optional[str] = None
-    @property
-    def is_dir(self) -> bool:
-        return self.dir_type == DirType.DIR
-    @classmethod
-    def from_dir(cls, path: str, **kwargs) -> "Entry":
-        return cls(dir_type=DirType.DIR, path=path, **kwargs)
     @classmethod
     def from_file(cls, path: str, **kwargs) -> "Entry":
-        return cls(dir_type=DirType.FILE, path=path, **kwargs)
-    @classmethod
-    def root(cls):
-        return cls(dir_type=DirType.DIR)
+        return cls(path=path, **kwargs)
     @property
     def full_path(self) -> str:
-        if self.is_dir and self.path:
-            return self.path + "/"
         return self.path
     @property
@@ -229,9 +207,9 @@ class NodeWithPath:
 TIME_FMT = "%Y-%m-%d %H:%M"
-def long_line_str(name: str, timestamp: Optional[datetime], owner: str) -> str:
+def long_line_str(name: str, timestamp: Optional[datetime]) -> str:
     if timestamp is None:
         time = "-"
     else:
         time = timestamp.strftime(TIME_FMT)
-    return f"{owner: <19} {time: <19} {name}"
+    return f"{time: <19} {name}"

datachain/query/builtins.py CHANGED Viewed

@@ -22,10 +22,6 @@ def load_tar(raw):
         C.source,
         C.path,
         C.size,
-        C.vtype,
-        C.dir_type,
-        C.owner_name,
-        C.owner_id,
         C.is_latest,
         C.last_modified,
         C.version,
@@ -38,10 +34,6 @@ def index_tar(
     source,
     parent_path,
     size,
-    vtype,
-    dir_type,
-    owner_name,
-    owner_id,
     is_latest,
     last_modified,
     version,
@@ -53,10 +45,6 @@ def index_tar(
         source=source,
         path=parent_path,
         size=size,
-        vtype=vtype,
-        dir_type=dir_type,
-        owner_name=owner_name,
-        owner_id=owner_id,
         is_latest=bool(is_latest),
         last_modified=last_modified,
         version=version,
@@ -70,7 +58,6 @@ def index_tar(
                 source=source,
                 path=full_path,
                 size=info.size,
-                vtype="tar",
                 location={
                     "vtype": "tar",
                     "offset": info.offset_data,
@@ -81,7 +68,6 @@ def index_tar(
                         "version": version,
                         "size": size,
                         "etag": etag,
-                        "vtype": "",
                         "location": None,
                     },
                 },

datachain/query/schema.py CHANGED Viewed

@@ -9,7 +9,7 @@ import attrs
 import sqlalchemy as sa
 from fsspec.callbacks import DEFAULT_CALLBACK, Callback
-from datachain.sql.types import JSON, Boolean, DateTime, Int, Int64, SQLType, String
+from datachain.sql.types import JSON, Boolean, DateTime, Int64, SQLType, String
 if TYPE_CHECKING:
     from datachain.catalog import Catalog
@@ -222,10 +222,6 @@ class DatasetRow:
         "path": String,
         "size": Int64,
         "location": JSON,
-        "vtype": String,
-        "dir_type": Int,
-        "owner_name": String,
-        "owner_id": String,
         "is_latest": Boolean,
         "last_modified": DateTime,
         "version": String,
@@ -238,10 +234,6 @@ class DatasetRow:
         source: str = "",
         size: int = 0,
         location: Optional[dict[str, Any]] = None,
-        vtype: str = "",
-        dir_type: int = 0,
-        owner_name: str = "",
-        owner_id: str = "",
         is_latest: bool = True,
         last_modified: Optional[datetime] = None,
         version: str = "",
@@ -251,10 +243,7 @@ class DatasetRow:
         str,
         int,
         Optional[str],
-        str,
         int,
-        str,
-        str,
         bool,
         datetime,
         str,
@@ -271,10 +260,6 @@ class DatasetRow:
             path,
             size,
             location,
-            vtype,
-            dir_type,
-            owner_name,
-            owner_id,
             is_latest,
             last_modified,
             version,

datachain/utils.py CHANGED Viewed

@@ -340,11 +340,8 @@ def show_df(
                 "etag",
                 "is_latest",
                 "last_modified",
-                "owner_id",
-                "owner_name",
                 "size",
                 "version",
-                "vtype",
             ],
             inplace=True,
             errors="ignore",

{datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datachain
-Version: 0.3.11
+Version: 0.3.13
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License: Apache-2.0
@@ -80,7 +80,6 @@ Requires-Dist: pytest-cov >=4.1.0 ; extra == 'tests'
 Requires-Dist: pytest-mock >=3.12.0 ; extra == 'tests'
 Requires-Dist: pytest-servers[all] >=0.5.5 ; extra == 'tests'
 Requires-Dist: pytest-benchmark[histogram] ; extra == 'tests'
-Requires-Dist: pytest-asyncio >=0.23.2 ; extra == 'tests'
 Requires-Dist: pytest-xdist >=3.3.1 ; extra == 'tests'
 Requires-Dist: virtualenv ; extra == 'tests'
 Requires-Dist: dulwich ; extra == 'tests'
@@ -96,12 +95,14 @@ Requires-Dist: transformers >=4.36.0 ; extra == 'torch'
 Provides-Extra: vector
 Requires-Dist: usearch ; extra == 'vector'
-.. image:: docs/assets/datachain_logotype.svg
-   :height: 48
-   :alt: DataChain logo
+================
+|logo| DataChain
+================
 |PyPI| |Python Version| |Codecov| |Tests|
+.. |logo| image:: docs/assets/datachain.svg
+   :height: 24
 .. |PyPI| image:: https://img.shields.io/pypi/v/datachain.svg
    :target: https://pypi.org/project/datachain/
    :alt: PyPI
@@ -115,8 +116,6 @@ Requires-Dist: usearch ; extra == 'vector'
    :target: https://github.com/iterative/datachain/actions/workflows/tests.yml
    :alt: Tests
-----------------
 DataChain is a modern Pythonic data-frame library designed for artificial intelligence.
 It is made to organize your unstructured data into datasets and wrangle it at scale on
 your local machine. Datachain does not abstract or hide the AI models and API calls, but helps to integrate them into the postmodern data stack.

{datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/RECORD RENAMED Viewed

@@ -1,65 +1,65 @@
 datachain/__init__.py,sha256=GeyhE-5LgfJav2OKYGaieP2lBvf2Gm-ihj7thnK9zjI,800
 datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
 datachain/asyn.py,sha256=biF8M8fQujtj5xs0VLi8S16eBtzG6kceWlO_NILbCsg,8197
-datachain/cache.py,sha256=wznC2pge6RhlPTaJfBVGjmBc6bxWCPThu4aTFMltvFU,4076
-datachain/cli.py,sha256=ECf_z5X8ILDJdUn2Cpb_z-ZjSRIzn7skiuMGfM-y9i0,30999
+datachain/cache.py,sha256=WP-ktH_bRn3w2g1JOOQ7rCPsZyR4OM6K1Kb7yZsSSns,4056
+datachain/cli.py,sha256=alMjnoBUBLvBSMBR51N09rA_aUEdHJwyxSRogF7VbbA,30891
 datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
 datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
 datachain/dataset.py,sha256=EcYjhHg1dxxPbDwSuIxc-mDRDo3v_pYf79fMy4re1oA,14740
-datachain/error.py,sha256=GY9KYTmb7GHXn2gGHV9X-PBhgwLj3i7VpK7tGHtAoGM,1279
+datachain/error.py,sha256=OnZ8OaBtDdTZPy8XQiy29SAjqdQArQeorYbP5ju7ldc,1199
 datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
-datachain/listing.py,sha256=keLkvPfumDA3gijeIiinH5yGWe71qCxgF5HqqP5AeH4,8299
-datachain/node.py,sha256=dcm_7dVurFHpI0EHV2K6SjpJyh-gN4PVWAB-20quk04,6382
+datachain/listing.py,sha256=LgL0lV10AzD1v52ajSaJKFnyiq4hNXwQiqaGySWGQsw,8290
+datachain/node.py,sha256=gacKxUPLgJ1ul6LJWz7nylYjUWPbyUY5cqaBFDOnO9E,5756
 datachain/nodes_fetcher.py,sha256=kca19yvu11JxoVY1t4_ydp1FmchiV88GnNicNBQ9NIA,831
 datachain/nodes_thread_pool.py,sha256=ZyzBvUImIPmi4WlKC2SW2msA0UhtembbTdcs2nx29A0,3191
 datachain/progress.py,sha256=7_8FtJs770ITK9sMq-Lt4k4k18QmYl4yIG_kCoWID3o,4559
 datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/storage.py,sha256=RiSJLYdHUjnrEWkLBKPcETHpAxld_B2WxLg711t0aZI,3733
-datachain/utils.py,sha256=HKUdVqreBTzcCULAYRw1sC6z33OaomVD1WoMSoFcPHg,13148
+datachain/utils.py,sha256=Z9-lPNvrrAh_VWpzVBJ7L5-Oy_Oo1V0ZW7G0MVDyPK4,13065
 datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
-datachain/catalog/catalog.py,sha256=NgS7_SlmpJdUSp1v8KdCuLTjFklmYvT_jOLdzTyyK5I,72313
+datachain/catalog/catalog.py,sha256=hhLciKHD0dVwniFzUsYORQ72WpnM40QYT0ydoyx1Kvw,69308
 datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
 datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
 datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
 datachain/client/azure.py,sha256=LXSahE0Z6r4dXqpBkKnq3J5fg7N7ymC1lSn-1SoILGc,2687
 datachain/client/fileslice.py,sha256=bT7TYco1Qe3bqoc8aUkUZcPdPofJDHlryL5BsTn9xsY,3021
-datachain/client/fsspec.py,sha256=LQb5tr-pP9umCFYo3nGJR_dZxUyiSN7IDE8jhp1TXco,13333
+datachain/client/fsspec.py,sha256=Hy3-4HRV-3MozOybqAnF-qL0EoMYFHynpTG_YZphjZE,13298
 datachain/client/gcs.py,sha256=P_E3mhzhXR9mJ_wc3AYZuczzwOJ0-D3J5qhJXeSU-xk,4518
 datachain/client/hf.py,sha256=R-F6Ks6aVM9wSNkIXOkOnZFwsJlfdRwJjymRa78RLjM,1246
 datachain/client/local.py,sha256=H8TNY8pi2kA8y9_f_1XLUjJF66f229qC_b2y4xGkzdU,5300
-datachain/client/s3.py,sha256=aQxfMH8G8bUjmHF1-6P90MSkXsU5DgOPEVlKWLu459I,6568
+datachain/client/s3.py,sha256=zs41EvYW1bS_pUxnkCnJILzUJpL2V1jvvVKSN4BKYcc,6326
 datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZv32Y8,398
 datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
 datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
 datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
 datachain/data_storage/metastore.py,sha256=cHN0xmbUvChyayHHZm3Vqxr87jFqojPSlGBqhTPStlE,54519
-datachain/data_storage/schema.py,sha256=JKpSEz8igpwZ9zkpRPYVXZxEpiXuLKEs2WNhH0KqM6U,8552
+datachain/data_storage/schema.py,sha256=AGbjyEir5UmRZXI3m0jChZogUh5wd8csj6-YlUWaAxQ,8383
 datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
 datachain/data_storage/sqlite.py,sha256=Z4B2KDL4C8Uio2aLMxaKv0t2MoOtCV3bSqWg4X9mTFg,28048
-datachain/data_storage/warehouse.py,sha256=f7ETnYIXx5KMcPfwg_4bh_00QJiMLIliwE_41vmRGUo,33037
+datachain/data_storage/warehouse.py,sha256=s5hhVUWrlEopE6eGOqzXHeNtRapK30G8gj0Vkt_HHFQ,32649
 datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/lib/arrow.py,sha256=dV17oGiknqEW55ogGK_9T0ycNFwd2z-EFOW0AQiR6TU,5840
-datachain/lib/clip.py,sha256=33RL11OIqfbwyhvBgiMGM8rDAnZx1IRmxk9dY89ls3Q,6130
+datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
 datachain/lib/data_model.py,sha256=gHIjlow84GMRDa78yLL1Ud-N18or21fnTyPEwsatpXY,2045
 datachain/lib/dataset_info.py,sha256=srPPhI2UHf6hFPBecyFEVw2SS5aPisIIMsvGgKqi7ss,2366
-datachain/lib/dc.py,sha256=s4E-bD6_T6JFJ7TEa5Y9RS705lIfcV9OUJwDD6RNCX0,68156
-datachain/lib/file.py,sha256=WOOYw3LcGROA6wshJ_aZkSgcTqfB4UxTbZDTx9KqAOg,11429
+datachain/lib/dc.py,sha256=C-sfWRinV8pDK2P6UHLbScOahTlTiVQpoxUUdVllF2k,68710
+datachain/lib/file.py,sha256=rXmyzUFgnLQ4J3CyOCcg-guhzAz4x9Ug595FbNn4Y2E,11398
 datachain/lib/hf.py,sha256=ZiMvgy3DYiklGKZv-w7gevrHOgn3bGfpTlpDPOHCNqs,5336
-datachain/lib/image.py,sha256=WbcwSaFzuyqjg4x4hH5CUogeUQjkZFjQHqw_oDEV1nA,2655
+datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
 datachain/lib/listing.py,sha256=S9Xn_Saxu4xk3K_01VexkfMZW0INQiATlidt2bzgWKY,3938
 datachain/lib/listing_info.py,sha256=sr5KzCXlCxlPuRmy_pVadD4miLpp5y0btvyaIPcluwI,996
-datachain/lib/meta_formats.py,sha256=67uF9trQ2II6xFvN0u6eo5NNRf5xvCkpMHj7ThiG41Y,6777
-datachain/lib/model_store.py,sha256=c4USXsBBjrGH8VOh4seIgOiav-qHOwdoixtxfLgU63c,2409
+datachain/lib/meta_formats.py,sha256=3f-0vpMTesagS9iMd3y9-u9r-7g0eqYsxmK4fVfNWlw,6635
+datachain/lib/model_store.py,sha256=xcrQ69-jcQs716U4UFOSoSKM7EvFIWqxlPhIcE4X7oI,2497
 datachain/lib/pytorch.py,sha256=vK3GbWCy7kunN7ubul6w1hrWmJLja56uTCiMG_7XVQA,5623
 datachain/lib/settings.py,sha256=39thOpYJw-zPirzeNO6pmRC2vPrQvt4eBsw1xLWDFsw,2344
 datachain/lib/signal_schema.py,sha256=hqQLwUmt3w8RLa96MtubK9N2CBXqqTPrUkSRXc0ktt4,20275
-datachain/lib/text.py,sha256=vqs1SQdsw1vCzfvOanIeT4xY2R2TmPonElBgYDVeZmY,1241
+datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
 datachain/lib/udf.py,sha256=nG7DDuPgZ5ZuijwvDoCq-OZMxlDM8vFNzyxMmik0Y1c,11716
 datachain/lib/udf_signature.py,sha256=gMStcEeYJka5M6cg50Z9orC6y6HzCAJ3MkFqqn1fjZg,7137
 datachain/lib/utils.py,sha256=5-kJlAZE0D9nXXweAjo7-SP_AWGo28feaDByONYaooQ,463
 datachain/lib/vfile.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datachain/lib/webdataset.py,sha256=Q3UlCk66341sq-nvFbBCX4Cv3cYXBK9n12ejG4axPXE,8298
-datachain/lib/webdataset_laion.py,sha256=PQP6tQmUP7Xu9fPuAGK1JDBYA6T5UufYMUTGaxgspJA,2118
+datachain/lib/webdataset.py,sha256=ZzGLtOUA-QjP4kttGgNqhrioDuDnomWFlsow4fLdezQ,8717
+datachain/lib/webdataset_laion.py,sha256=aGMWeFmeYNK75ewO9JTA11iB1i3QtTzUfenQA5jajfo,2535
 datachain/lib/convert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/lib/convert/flatten.py,sha256=Uebc5CeqCsacp-nr6IG9i6OGuUavXqdqnoGctZBk3RQ,1384
 datachain/lib/convert/python_to_sql.py,sha256=40SAOdoOgikZRhn8iomCPDRoxC3RFxjJLivEAA9MHDU,2880
@@ -68,13 +68,13 @@ datachain/lib/convert/unflatten.py,sha256=Ogvh_5wg2f38_At_1lN0D_e2uZOOpYEvwvB2xd
 datachain/lib/convert/values_to_tuples.py,sha256=YOdbjzHq-uj6-cV2Qq43G72eN2avMNDGl4x5t6yQMl8,3931
 datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
 datachain/query/batch.py,sha256=-vlpINJiertlnaoUVv1C95RatU0F6zuhpIYRufJRo1M,3660
-datachain/query/builtins.py,sha256=EmKPYsoQ46zwdyOn54MuCzvYFmfsBn5F8zyF7UBUfrc,2550
+datachain/query/builtins.py,sha256=U6yHPF9bzxqK5iwyqCqbJxo8ggBVx9FtuXxRrQQ0SNM,2244
 datachain/query/dataset.py,sha256=B2EmGOL8gjrdU_WhU88Dj7FsxvxrNeKwe2STXnU9T9E,58369
 datachain/query/dispatch.py,sha256=GBh3EZHDp5AaXxrjOpfrpfsuy7Umnqxu-MAXcK9X3gc,12945
 datachain/query/metrics.py,sha256=r5b0ygYhokbXp8Mg3kCH8iFSRw0jxzyeBe-C-J_bKFc,938
 datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
 datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
-datachain/query/schema.py,sha256=BvHipN79CnSTbVFcfIEwzo1npe7HmThnk0iY-CSLEkM,7899
+datachain/query/schema.py,sha256=ytlkA1xFAUOia25u8d6pxvxBSRl3uivLuOe2eHaw-qc,7550
 datachain/query/session.py,sha256=UPH5Z4fzCDsvj81ji0e8GA6Mgra3bOAEpVq4htqOtis,4317
 datachain/query/udf.py,sha256=j3NhmKK5rYG5TclcM2Sr0LhS1tmYLMjzMugx9G9iFLM,8100
 datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -96,9 +96,9 @@ datachain/sql/sqlite/base.py,sha256=WLPHBhZbXbiqPoRV1VgDrXJqku4UuvJpBhYeQ0k5rI8,
 datachain/sql/sqlite/types.py,sha256=yzvp0sXSEoEYXs6zaYC_2YubarQoZH-MiUNXcpuEP4s,1573
 datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
 datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
-datachain-0.3.11.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
-datachain-0.3.11.dist-info/METADATA,sha256=iSdfjWpVT1Iqzlg82eN5QzJ-icaYxkG7TUKEpEOi5sk,17124
-datachain-0.3.11.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
-datachain-0.3.11.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
-datachain-0.3.11.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
-datachain-0.3.11.dist-info/RECORD,,
+datachain-0.3.13.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
+datachain-0.3.13.dist-info/METADATA,sha256=pzMOR9LYuLR26Wifk4GPS9Wi1mmqCC5CIBZyA-X5_oo,17073
+datachain-0.3.13.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
+datachain-0.3.13.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
+datachain-0.3.13.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
+datachain-0.3.13.dist-info/RECORD,,

{datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/LICENSE RENAMED Viewed

File without changes

{datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/WHEEL RENAMED Viewed

File without changes

{datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datachain-0.3.11.dist-info → datachain-0.3.13.dist-info}/top_level.txt RENAMED Viewed

File without changes

datachain 0.3.11__py3-none-any.whl → 0.3.13__py3-none-any.whl

Potentially problematic release.

datachain 0.3.11py3-none-any.whl → 0.3.13py3-none-any.whl