PyPI - datachain - Versions diffs - 0.8.10__py3-none-any.whl → 0.8.11__py3-none-any.whl - Mend

datachain 0.8.10py3-none-any.whl → 0.8.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (31) hide show

datachain/cache.py +4 -4
datachain/catalog/__init__.py +0 -2
datachain/catalog/catalog.py +102 -138
datachain/cli/__init__.py +7 -6
datachain/cli/parser/__init__.py +27 -16
datachain/cli/parser/studio.py +7 -6
datachain/cli/parser/utils.py +18 -0
datachain/client/fsspec.py +11 -8
datachain/client/local.py +4 -4
datachain/data_storage/schema.py +1 -1
datachain/dataset.py +1 -1
datachain/error.py +12 -0
datachain/func/__init__.py +2 -1
datachain/func/conditional.py +67 -23
datachain/func/func.py +17 -5
datachain/lib/dc.py +24 -4
datachain/lib/file.py +16 -0
datachain/lib/listing.py +30 -12
datachain/lib/pytorch.py +1 -1
datachain/lib/udf.py +1 -1
datachain/listing.py +1 -13
datachain/node.py +0 -15
datachain/nodes_fetcher.py +2 -2
datachain/remote/studio.py +1 -1
datachain/studio.py +1 -1
{datachain-0.8.10.dist-info → datachain-0.8.11.dist-info}/METADATA +3 -7
{datachain-0.8.10.dist-info → datachain-0.8.11.dist-info}/RECORD +31 -31
{datachain-0.8.10.dist-info → datachain-0.8.11.dist-info}/LICENSE +0 -0
{datachain-0.8.10.dist-info → datachain-0.8.11.dist-info}/WHEEL +0 -0
{datachain-0.8.10.dist-info → datachain-0.8.11.dist-info}/entry_points.txt +0 -0
{datachain-0.8.10.dist-info → datachain-0.8.11.dist-info}/top_level.txt +0 -0

datachain/client/fsspec.py CHANGED Viewed

@@ -3,6 +3,7 @@ import functools
 import logging
 import multiprocessing
 import os
+import posixpath
 import re
 import sys
 from abc import ABC, abstractmethod
@@ -25,7 +26,7 @@ from fsspec.asyn import get_loop, sync
 from fsspec.callbacks import DEFAULT_CALLBACK, Callback
 from tqdm.auto import tqdm
-from datachain.cache import DataChainCache
+from datachain.cache import Cache
 from datachain.client.fileslice import FileWrapper
 from datachain.error import ClientError as DataChainClientError
 from datachain.nodes_fetcher import NodesFetcher
@@ -74,9 +75,7 @@ class Client(ABC):
     PREFIX: ClassVar[str]
     protocol: ClassVar[str]
-    def __init__(
-        self, name: str, fs_kwargs: dict[str, Any], cache: DataChainCache
-    ) -> None:
+    def __init__(self, name: str, fs_kwargs: dict[str, Any], cache: Cache) -> None:
         self.name = name
         self.fs_kwargs = fs_kwargs
         self._fs: Optional[AbstractFileSystem] = None
@@ -122,7 +121,7 @@ class Client(ABC):
         return cls.get_uri(storage_name), rel_path
     @staticmethod
-    def get_client(source: str, cache: DataChainCache, **kwargs) -> "Client":
+    def get_client(source: str, cache: Cache, **kwargs) -> "Client":
         cls = Client.get_implementation(source)
         storage_url, _ = cls.split_url(source)
         if os.name == "nt":
@@ -145,7 +144,7 @@ class Client(ABC):
     def from_name(
         cls,
         name: str,
-        cache: DataChainCache,
+        cache: Cache,
         kwargs: dict[str, Any],
     ) -> "Client":
         return cls(name, kwargs, cache)
@@ -154,7 +153,7 @@ class Client(ABC):
     def from_source(
         cls,
         uri: "StorageURI",
-        cache: DataChainCache,
+        cache: Cache,
         **kwargs,
     ) -> "Client":
         return cls(cls.FS_CLASS._strip_protocol(uri), kwargs, cache)
@@ -390,8 +389,12 @@ class Client(ABC):
             self.fs.open(self.get_full_path(file.path, file.version)), cb
         )  # type: ignore[return-value]
-    def upload(self, path: str, data: bytes) -> "File":
+    def upload(self, data: bytes, path: str) -> "File":
         full_path = self.get_full_path(path)
+        parent = posixpath.dirname(full_path)
+        self.fs.makedirs(parent, exist_ok=True)
         self.fs.pipe_file(full_path, data)
         file_info = self.fs.info(full_path)
         return self.info_to_file(file_info, path)

datachain/client/local.py CHANGED Viewed

@@ -12,7 +12,7 @@ from datachain.lib.file import File
 from .fsspec import Client
 if TYPE_CHECKING:
-    from datachain.cache import DataChainCache
+    from datachain.cache import Cache
     from datachain.dataset import StorageURI
@@ -25,7 +25,7 @@ class FileClient(Client):
         self,
         name: str,
         fs_kwargs: dict[str, Any],
-        cache: "DataChainCache",
+        cache: "Cache",
         use_symlinks: bool = False,
     ) -> None:
         super().__init__(name, fs_kwargs, cache)
@@ -82,7 +82,7 @@ class FileClient(Client):
         return bucket, path
     @classmethod
-    def from_name(cls, name: str, cache: "DataChainCache", kwargs) -> "FileClient":
+    def from_name(cls, name: str, cache: "Cache", kwargs) -> "FileClient":
         use_symlinks = kwargs.pop("use_symlinks", False)
         return cls(name, kwargs, cache, use_symlinks=use_symlinks)
@@ -90,7 +90,7 @@ class FileClient(Client):
     def from_source(
         cls,
         uri: str,
-        cache: "DataChainCache",
+        cache: "Cache",
         use_symlinks: bool = False,
         **kwargs,
     ) -> "FileClient":

datachain/data_storage/schema.py CHANGED Viewed

@@ -200,7 +200,7 @@ class DataTable:
         columns: Sequence["sa.Column"] = (),
         metadata: Optional["sa.MetaData"] = None,
     ):
-        # copy columns, since re-using the same objects from another table
+        # copy columns, since reusing the same objects from another table
         # may raise an error
         columns = cls.sys_columns() + [cls.copy_column(c) for c in columns]
         columns = dedup_columns(columns)

datachain/dataset.py CHANGED Viewed

@@ -91,7 +91,7 @@ class DatasetDependency:
         if self.type == DatasetDependencyType.DATASET:
             return self.name
-        list_dataset_name, _, _ = parse_listing_uri(self.name.strip("/"), None, {})
+        list_dataset_name, _, _ = parse_listing_uri(self.name.strip("/"), {})
         assert list_dataset_name
         return list_dataset_name

datachain/error.py CHANGED Viewed

@@ -1,3 +1,15 @@
+import botocore.errorfactory
+import botocore.exceptions
+import gcsfs.retry
+REMOTE_ERRORS = (
+    gcsfs.retry.HttpError,  # GCS
+    OSError,  # GCS
+    botocore.exceptions.BotoCoreError,  # S3
+    ValueError,  # Azure
+)
 class DataChainError(RuntimeError):
     pass

datachain/func/__init__.py CHANGED Viewed

@@ -16,7 +16,7 @@ from .aggregate import (
     sum,
 )
 from .array import cosine_distance, euclidean_distance, length, sip_hash_64
-from .conditional import case, greatest, ifelse, least
+from .conditional import case, greatest, ifelse, isnone, least
 from .numeric import bit_and, bit_hamming_distance, bit_or, bit_xor, int_hash_64
 from .random import rand
 from .string import byte_hamming_distance
@@ -42,6 +42,7 @@ __all__ = [
     "greatest",
     "ifelse",
     "int_hash_64",
+    "isnone",
     "least",
     "length",
     "literal",

datachain/func/conditional.py CHANGED Viewed

@@ -1,14 +1,15 @@
-from typing import Union
+from typing import Optional, Union
+from sqlalchemy import ColumnElement
 from sqlalchemy import case as sql_case
-from sqlalchemy.sql.elements import BinaryExpression
 from datachain.lib.utils import DataChainParamsError
+from datachain.query.schema import Column
 from datachain.sql.functions import conditional
 from .func import ColT, Func
-CaseT = Union[int, float, complex, bool, str]
+CaseT = Union[int, float, complex, bool, str, Func]
 def greatest(*args: Union[ColT, float]) -> Func:
@@ -87,17 +88,21 @@ def least(*args: Union[ColT, float]) -> Func:
     )
-def case(*args: tuple[BinaryExpression, CaseT], else_=None) -> Func:
+def case(
+    *args: tuple[Union[ColumnElement, Func], CaseT], else_: Optional[CaseT] = None
+) -> Func:
     """
     Returns the case function that produces case expression which has a list of
-    conditions and corresponding results. Results can only be python primitives
-    like string, numbes or booleans. Result type is inferred from condition results.
+    conditions and corresponding results. Results can be python primitives like string,
+    numbers or booleans but can also be other nested function (including case function).
+    Result type is inferred from condition results.
     Args:
-        args (tuple(BinaryExpression, value(str | int | float | complex | bool):
-            - Tuple of binary expression and values pair which corresponds to one
-            case condition - value
-        else_ (str | int | float | complex | bool): else value in case expression
+        args (tuple((ColumnElement, Func), (str | int | float | complex | bool, Func))):
+            Tuple of condition and values pair.
+        else_ (str | int | float | complex | bool, Func): optional else value in case
+            expression. If omitted, and no case conditions are satisfied, the result
+            will be None (NULL in DB).
     Returns:
         Func: A Func object that represents the case function.
@@ -111,15 +116,24 @@ def case(*args: tuple[BinaryExpression, CaseT], else_=None) -> Func:
     """
     supported_types = [int, float, complex, str, bool]
-    type_ = type(else_) if else_ else None
+    def _get_type(val):
+        if isinstance(val, Func):
+            # nested functions
+            return val.result_type
+        return type(val)
     if not args:
         raise DataChainParamsError("Missing statements")
+    type_ = _get_type(else_) if else_ is not None else None
     for arg in args:
-        if type_ and not isinstance(arg[1], type_):
-            raise DataChainParamsError("Statement values must be of the same type")
-        type_ = type(arg[1])
+        arg_type = _get_type(arg[1])
+        if type_ and arg_type != type_:
+            raise DataChainParamsError(
+                f"Statement values must be of the same type, got {type_} and {arg_type}"
+            )
+        type_ = arg_type
     if type_ not in supported_types:
         raise DataChainParamsError(
@@ -127,20 +141,25 @@ def case(*args: tuple[BinaryExpression, CaseT], else_=None) -> Func:
         )
     kwargs = {"else_": else_}
-    return Func("case", inner=sql_case, args=args, kwargs=kwargs, result_type=type_)
+    return Func("case", inner=sql_case, cols=args, kwargs=kwargs, result_type=type_)
-def ifelse(condition: BinaryExpression, if_val: CaseT, else_val: CaseT) -> Func:
+def ifelse(
+    condition: Union[ColumnElement, Func], if_val: CaseT, else_val: CaseT
+) -> Func:
     """
     Returns the ifelse function that produces if expression which has a condition
-    and values for true and false outcome. Results can only be python primitives
-    like string, numbes or booleans. Result type is inferred from the values.
+    and values for true and false outcome. Results can be one of python primitives
+    like string, numbers or booleans, but can also be nested functions.
+    Result type is inferred from the values.
     Args:
-        condition: BinaryExpression - condition which is evaluated
-        if_val: (str | int | float | complex | bool): value for true condition outcome
-        else_val: (str | int | float | complex | bool): value for false condition
-         outcome
+        condition (ColumnElement, Func):  Condition which is evaluated.
+        if_val (str | int | float | complex | bool, Func): Value for true
+            condition outcome.
+        else_val (str | int | float | complex | bool, Func): Value for false condition
+            outcome.
     Returns:
         Func: A Func object that represents the ifelse function.
@@ -148,8 +167,33 @@ def ifelse(condition: BinaryExpression, if_val: CaseT, else_val: CaseT) -> Func:
     Example:
         ```py
         dc.mutate(
-            res=func.ifelse(C("num") > 0, "P", "N"),
+            res=func.ifelse(isnone("col"), "EMPTY", "NOT_EMPTY")
         )
         ```
     """
     return case((condition, if_val), else_=else_val)
+def isnone(col: Union[str, Column]) -> Func:
+    """
+    Returns True if column value is None, otherwise False.
+    Args:
+        col (str | Column): Column to check if it's None or not.
+            If a string is provided, it is assumed to be the name of the column.
+    Returns:
+        Func: A Func object that represents the conditional to check if column is None.
+    Example:
+        ```py
+        dc.mutate(test=ifelse(isnone("col"), "EMPTY", "NOT_EMPTY"))
+        ```
+    """
+    from datachain import C
+    if isinstance(col, str):
+        # if string, it is assumed to be the name of the column
+        col = C(col)
+    return case((col.is_(None) if col is not None else True, True), else_=False)

datachain/func/func.py CHANGED Viewed

@@ -23,7 +23,7 @@ if TYPE_CHECKING:
     from .window import Window
-ColT = Union[str, ColumnElement, "Func"]
+ColT = Union[str, ColumnElement, "Func", tuple]
 class Func(Function):
@@ -78,7 +78,7 @@ class Func(Function):
         return (
             [
                 col
-                if isinstance(col, (Func, BindParameter, Case, Comparator))
+                if isinstance(col, (Func, BindParameter, Case, Comparator, tuple))
                 else ColumnMeta.to_db_name(
                     col.name if isinstance(col, ColumnElement) else col
                 )
@@ -381,17 +381,24 @@ class Func(Function):
         col_type = self.get_result_type(signals_schema)
         sql_type = python_to_sql(col_type)
-        def get_col(col: ColT) -> ColT:
+        def get_col(col: ColT, string_as_literal=False) -> ColT:
+            # string_as_literal is used only for conditionals like `case()` where
+            # literals are nested inside ColT as we have tuples of condition - values
+            # and if user wants to set some case value as column, explicit `C("col")`
+            # syntax must be used to distinguish from literals
+            if isinstance(col, tuple):
+                return tuple(get_col(x, string_as_literal=True) for x in col)
             if isinstance(col, Func):
                 return col.get_column(signals_schema, table=table)
-            if isinstance(col, str):
+            if isinstance(col, str) and not string_as_literal:
                 column = Column(col, sql_type)
                 column.table = table
                 return column
             return col
         cols = [get_col(col) for col in self._db_cols]
-        func_col = self.inner(*cols, *self.args, **self.kwargs)
+        kwargs = {k: get_col(v, string_as_literal=True) for k, v in self.kwargs.items()}
+        func_col = self.inner(*cols, *self.args, **kwargs)
         if self.is_window:
             if not self.window:
@@ -416,6 +423,11 @@ class Func(Function):
 def get_db_col_type(signals_schema: "SignalSchema", col: ColT) -> "DataType":
+    if isinstance(col, tuple):
+        raise DataChainParamsError(
+            "Cannot get type from tuple, please provide type hint to the function"
+        )
     if isinstance(col, Func):
         return col.get_result_type(signals_schema)

datachain/lib/dc.py CHANGED Viewed

@@ -25,6 +25,7 @@ from sqlalchemy.sql.functions import GenericFunction
 from sqlalchemy.sql.sqltypes import NullType
 from datachain.dataset import DatasetRecord
+from datachain.func import literal
 from datachain.func.base import Function
 from datachain.func.func import Func
 from datachain.lib.convert.python_to_sql import python_to_sql
@@ -1129,8 +1130,12 @@ class DataChain:
         )
         ```
         """
+        primitives = (bool, str, int, float)
         for col_name, expr in kwargs.items():
-            if not isinstance(expr, (Column, Func)) and isinstance(expr.type, NullType):
+            if not isinstance(expr, (*primitives, Column, Func)) and isinstance(
+                expr.type, NullType
+            ):
                 raise DataChainColumnError(
                     col_name, f"Cannot infer type with expression {expr}"
                 )
@@ -1145,6 +1150,11 @@ class DataChain:
             elif isinstance(value, Func):
                 # adding new signal
                 mutated[name] = value.get_column(schema)
+            elif isinstance(value, primitives):
+                # adding simple python constant primitives like str, int, float, bool
+                val = literal(value)
+                val.type = python_to_sql(type(value))()
+                mutated[name] = val  # type: ignore[assignment]
             else:
                 # adding new signal
                 mutated[name] = value
@@ -1942,7 +1952,7 @@ class DataChain:
     def from_csv(
         cls,
         path,
-        delimiter: str = ",",
+        delimiter: Optional[str] = None,
         header: bool = True,
         output: OutputType = None,
         object_name: str = "",
@@ -1952,6 +1962,7 @@ class DataChain:
         session: Optional[Session] = None,
         settings: Optional[dict] = None,
         column_types: Optional[dict[str, "Union[str, ArrowDataType]"]] = None,
+        parse_options: Optional[dict[str, "Union[str, Union[bool, Callable]]"]] = None,
         **kwargs,
     ) -> "DataChain":
         """Generate chain from csv files.
@@ -1959,7 +1970,8 @@ class DataChain:
         Parameters:
             path : Storage URI with directory. URI must start with storage prefix such
                 as `s3://`, `gs://`, `az://` or "file:///".
-            delimiter : Character for delimiting columns.
+            delimiter : Character for delimiting columns. Takes precedence if also
+                specified in `parse_options`. Defaults to ",".
             header : Whether the files include a header row.
             output : Dictionary or feature class defining column names and their
                 corresponding types. List of column names is also accepted, in which
@@ -1973,6 +1985,8 @@ class DataChain:
             column_types : Dictionary of column names and their corresponding types.
                 It is passed to CSV reader and for each column specified type auto
                 inference is disabled.
+            parse_options: Tells the parser how to process lines.
+                See https://arrow.apache.org/docs/python/generated/pyarrow.csv.ParseOptions.html
         Example:
             Reading a csv file:
@@ -1990,6 +2004,12 @@ class DataChain:
         from pyarrow.dataset import CsvFileFormat
         from pyarrow.lib import type_for_alias
+        parse_options = parse_options or {}
+        if "delimiter" not in parse_options:
+            parse_options["delimiter"] = ","
+        if delimiter:
+            parse_options["delimiter"] = delimiter
         if column_types:
             column_types = {
                 name: type_for_alias(typ) if isinstance(typ, str) else typ
@@ -2017,7 +2037,7 @@ class DataChain:
                 msg = f"error parsing csv - incompatible output type {type(output)}"
                 raise DatasetPrepareError(chain.name, msg)
-        parse_options = ParseOptions(delimiter=delimiter)
+        parse_options = ParseOptions(**parse_options)
         read_options = ReadOptions(column_names=column_names)
         convert_options = ConvertOptions(
             strings_can_be_null=True,

datachain/lib/file.py CHANGED Viewed

@@ -190,6 +190,22 @@ class File(DataModel):
         self._catalog = None
         self._caching_enabled: bool = False
+    @classmethod
+    def upload(
+        cls, data: bytes, path: str, catalog: Optional["Catalog"] = None
+    ) -> "File":
+        if catalog is None:
+            from datachain.catalog.loader import get_catalog
+            catalog = get_catalog()
+        parent, name = posixpath.split(path)
+        client = catalog.get_client(parent)
+        file = client.upload(data, name)
+        file._set_stream(catalog)
+        return file
     @classmethod
     def _from_row(cls, row: "RowDict") -> "Self":
         return cls(**{key: row[key] for key in cls._datachain_column_types})

datachain/lib/listing.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import logging
+import os
 import posixpath
 from collections.abc import Iterator
 from typing import TYPE_CHECKING, Callable, Optional, TypeVar
@@ -7,6 +9,7 @@ from sqlalchemy.sql.expression import true
 from datachain.asyn import iter_over_async
 from datachain.client import Client
+from datachain.error import REMOTE_ERRORS, ClientError
 from datachain.lib.file import File
 from datachain.query.schema import Column
 from datachain.sql.functions import path as pathfunc
@@ -22,6 +25,10 @@ LISTING_PREFIX = "lst__"  # listing datasets start with this name
 D = TypeVar("D", bound="DataChain")
+# Disable warnings for remote errors in clients
+logging.getLogger("aiobotocore.credentials").setLevel(logging.CRITICAL)
+logging.getLogger("gcsfs").setLevel(logging.CRITICAL)
 def list_bucket(uri: str, cache, client_config=None) -> Callable:
     """
@@ -90,6 +97,15 @@ def _isfile(client: "Client", path: str) -> bool:
     Returns True if uri points to a file
     """
     try:
+        if "://" in path:
+            # This makes sure that the uppercase scheme is converted to lowercase
+            scheme, path = path.split("://", 1)
+            path = f"{scheme.lower()}://{path}"
+        if os.name == "nt" and "*" in path:
+            # On Windows, the glob pattern "*" is not supported
+            return False
         info = client.fs.info(path)
         name = info.get("name")
         # case for special simulated directories on some clouds
@@ -99,21 +115,21 @@ def _isfile(client: "Client", path: str) -> bool:
             return False
         return info["type"] == "file"
-    except:  # noqa: E722
+    except FileNotFoundError:
         return False
+    except REMOTE_ERRORS as e:
+        raise ClientError(
+            message=str(e),
+            error_code=getattr(e, "code", None),
+        ) from e
-def parse_listing_uri(uri: str, cache, client_config) -> tuple[Optional[str], str, str]:
+def parse_listing_uri(uri: str, client_config) -> tuple[str, str, str]:
     """
     Parsing uri and returns listing dataset name, listing uri and listing path
     """
     client_config = client_config or {}
-    client = Client.get_client(uri, cache, **client_config)
     storage_uri, path = Client.parse_url(uri)
-    telemetry.log_param("client", client.PREFIX)
-    if not uri.endswith("/") and _isfile(client, uri):
-        return None, f"{storage_uri}/{path.lstrip('/')}", path
     if uses_glob(path):
         lst_uri_path = posixpath.dirname(path)
     else:
@@ -157,13 +173,15 @@ def get_listing(
     client_config = catalog.client_config
     client = Client.get_client(uri, cache, **client_config)
-    ds_name, list_uri, list_path = parse_listing_uri(uri, cache, client_config)
-    listing = None
+    telemetry.log_param("client", client.PREFIX)
-    # if we don't want to use cached dataset (e.g. for a single file listing)
-    if not ds_name:
-        return None, list_uri, list_path, False
+    # we don't want to use cached dataset (e.g. for a single file listing)
+    if not uri.endswith("/") and _isfile(client, uri):
+        storage_uri, path = Client.parse_url(uri)
+        return None, f"{storage_uri}/{path.lstrip('/')}", path, False
+    ds_name, list_uri, list_path = parse_listing_uri(uri, client_config)
+    listing = None
     listings = [
         ls for ls in catalog.listings() if not ls.is_expired and ls.contains(ds_name)
     ]

datachain/lib/pytorch.py CHANGED Viewed

@@ -23,7 +23,7 @@ from datachain.query.dataset import get_download_callback
 if TYPE_CHECKING:
     from torchvision.transforms.v2 import Transform
-    from datachain.cache import DataChainCache as Cache
+    from datachain.cache import Cache
 logger = logging.getLogger("datachain")

datachain/lib/udf.py CHANGED Viewed

@@ -32,7 +32,7 @@ if TYPE_CHECKING:
     from typing_extensions import Self
-    from datachain.cache import DataChainCache as Cache
+    from datachain.cache import Cache
     from datachain.catalog import Catalog
     from datachain.lib.signal_schema import SignalSchema
     from datachain.lib.udf_signature import UdfSignature

datachain/listing.py CHANGED Viewed

@@ -2,7 +2,6 @@ import glob
 import os
 from collections.abc import Iterable, Iterator
 from functools import cached_property
-from itertools import zip_longest
 from typing import TYPE_CHECKING, Optional
 from sqlalchemy import Column
@@ -101,11 +100,8 @@ class Listing:
         copy_to_filename: Optional[str],
         recursive=False,
         copy_dir_contents=False,
-        relative_path=None,
-        from_edatachain=False,
         from_dataset=False,
     ) -> list[NodeWithPath]:
-        rel_path_elements = relative_path.split("/") if relative_path else []
         all_nodes: list[NodeWithPath] = []
         for src in sources:
             node = src.node
@@ -119,15 +115,7 @@ class Listing:
                 )
             else:
                 node_path = []
-                if from_edatachain:
-                    for rpe, npe in zip_longest(
-                        rel_path_elements, node.path.split("/")
-                    ):
-                        if rpe == npe:
-                            continue
-                        if npe:
-                            node_path.append(npe)
-                elif copy_to_filename:
+                if copy_to_filename:
                     node_path = [os.path.basename(copy_to_filename)]
                 elif from_dataset:
                     node_path = [

datachain/node.py CHANGED Viewed

@@ -84,18 +84,6 @@ class Node:
         fd.write(f"  size: {self.size}\n")
         return size
-    def get_metafile_data(self, path: str):
-        data: dict[str, Any] = {
-            "name": path,
-            "etag": self.etag,
-        }
-        version = self.version
-        if version:
-            data["version"] = version
-        data["last_modified"] = time_to_str(self.last_modified)
-        data["size"] = self.size
-        return data
     @property
     def full_path(self) -> str:
         if self.is_dir and self.path:
@@ -181,9 +169,6 @@ class NodeWithPath:
     def append_to_file(self, fd):
         return self.n.append_to_file(fd, "/".join(self.path))
-    def get_metafile_data(self):
-        return self.n.get_metafile_data("/".join(self.path))
     @property
     def full_path(self) -> str:
         path = "/".join(self.path)

datachain/nodes_fetcher.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING
 from datachain.nodes_thread_pool import NodesThreadPool
 if TYPE_CHECKING:
-    from datachain.cache import DataChainCache
+    from datachain.cache import Cache
     from datachain.client.fsspec import Client
     from datachain.node import Node
@@ -13,7 +13,7 @@ logger = logging.getLogger("datachain")
 class NodesFetcher(NodesThreadPool):
-    def __init__(self, client: "Client", max_threads: int, cache: "DataChainCache"):
+    def __init__(self, client: "Client", max_threads: int, cache: "Cache"):
         super().__init__(max_threads)
         self.client = client
         self.cache = cache

datachain 0.8.10__py3-none-any.whl → 0.8.11__py3-none-any.whl

Potentially problematic release.

datachain 0.8.10py3-none-any.whl → 0.8.11py3-none-any.whl