PyPI - datachain - Versions diffs - 0.7.7__py3-none-any.whl → 0.7.9__py3-none-any.whl - Mend

datachain 0.7.7py3-none-any.whl → 0.7.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (18) hide show

datachain/cli.py +9 -3
datachain/data_storage/metastore.py +3 -2
datachain/func/__init__.py +5 -0
datachain/func/func.py +190 -145
datachain/func/numeric.py +162 -0
datachain/lib/dc.py +1 -0
datachain/lib/pytorch.py +54 -37
datachain/remote/studio.py +44 -25
datachain/sql/functions/array.py +4 -0
datachain/sql/functions/numeric.py +43 -0
datachain/sql/sqlite/base.py +68 -1
datachain/studio.py +2 -2
{datachain-0.7.7.dist-info → datachain-0.7.9.dist-info}/METADATA +2 -2
{datachain-0.7.7.dist-info → datachain-0.7.9.dist-info}/RECORD +18 -16
{datachain-0.7.7.dist-info → datachain-0.7.9.dist-info}/LICENSE +0 -0
{datachain-0.7.7.dist-info → datachain-0.7.9.dist-info}/WHEEL +0 -0
{datachain-0.7.7.dist-info → datachain-0.7.9.dist-info}/entry_points.txt +0 -0
{datachain-0.7.7.dist-info → datachain-0.7.9.dist-info}/top_level.txt +0 -0

datachain/cli.py CHANGED Viewed

@@ -16,7 +16,7 @@ from tabulate import tabulate
 from datachain import Session, utils
 from datachain.cli_utils import BooleanOptionalAction, CommaSeparatedArgs, KeyValueArgs
 from datachain.config import Config
-from datachain.error import DataChainError
+from datachain.error import DataChainError, DatasetNotFoundError
 from datachain.lib.dc import DataChain
 from datachain.studio import (
     edit_studio_dataset,
@@ -1056,7 +1056,10 @@ def rm_dataset(
     all, local, studio = _determine_flavors(studio, local, all, token)
     if all or local:
-        catalog.remove_dataset(name, version=version, force=force)
+        try:
+            catalog.remove_dataset(name, version=version, force=force)
+        except DatasetNotFoundError:
+            print("Dataset not found in local", file=sys.stderr)
     if (all or studio) and token:
         remove_studio_dataset(team, name, version, force)
@@ -1077,7 +1080,10 @@ def edit_dataset(
     all, local, studio = _determine_flavors(studio, local, all, token)
     if all or local:
-        catalog.edit_dataset(name, new_name, description, labels)
+        try:
+            catalog.edit_dataset(name, new_name, description, labels)
+        except DatasetNotFoundError:
+            print("Dataset not found in local", file=sys.stderr)
     if (all or studio) and token:
         edit_studio_dataset(team, name, new_name, description, labels)

datachain/data_storage/metastore.py CHANGED Viewed

@@ -725,9 +725,10 @@ class AbstractDBMetastore(AbstractMetastore):
     def list_datasets(self) -> Iterator["DatasetListRecord"]:
         """Lists all datasets."""
-        yield from self._parse_dataset_list(
-            self.db.execute(self._base_list_datasets_query())
+        query = self._base_list_datasets_query().order_by(
+            self._datasets.c.name, self._datasets_versions.c.version
         )
+        yield from self._parse_dataset_list(self.db.execute(query))
     def list_datasets_by_prefix(
         self, prefix: str, conn=None

datachain/func/__init__.py CHANGED Viewed

@@ -17,6 +17,7 @@ from .aggregate import (
 )
 from .array import cosine_distance, euclidean_distance, length, sip_hash_64
 from .conditional import greatest, least
+from .numeric import bit_and, bit_or, bit_xor, int_hash_64
 from .random import rand
 from .window import window
@@ -24,6 +25,9 @@ __all__ = [
     "any_value",
     "array",
     "avg",
+    "bit_and",
+    "bit_or",
+    "bit_xor",
     "case",
     "collect",
     "concat",
@@ -33,6 +37,7 @@ __all__ = [
     "euclidean_distance",
     "first",
     "greatest",
+    "int_hash_64",
     "least",
     "length",
     "literal",

datachain/func/func.py CHANGED Viewed

@@ -2,13 +2,15 @@ import inspect
 from collections.abc import Sequence
 from typing import TYPE_CHECKING, Any, Callable, Optional, Union
-from sqlalchemy import BindParameter, Case, ColumnElement, desc
+from sqlalchemy import BindParameter, Case, ColumnElement, Integer, cast, desc
 from sqlalchemy.ext.hybrid import Comparator
+from sqlalchemy.sql import func as sa_func
 from datachain.lib.convert.python_to_sql import python_to_sql
 from datachain.lib.convert.sql_to_python import sql_to_python
 from datachain.lib.utils import DataChainColumnError, DataChainParamsError
 from datachain.query.schema import Column, ColumnMeta
+from datachain.sql.functions import numeric
 from .base import Function
@@ -98,94 +100,232 @@ class Func(Function):
         return list[col_type] if self.is_array else col_type  # type: ignore[valid-type]
     def __add__(self, other: Union[ColT, float]) -> "Func":
-        return math_add(self, other)
+        if isinstance(other, (int, float)):
+            return Func("add", lambda a: a + other, [self])
+        return Func("add", lambda a1, a2: a1 + a2, [self, other])
     def __radd__(self, other: Union[ColT, float]) -> "Func":
-        return math_add(other, self)
+        if isinstance(other, (int, float)):
+            return Func("add", lambda a: other + a, [self])
+        return Func("add", lambda a1, a2: a1 + a2, [other, self])
     def __sub__(self, other: Union[ColT, float]) -> "Func":
-        return math_sub(self, other)
+        if isinstance(other, (int, float)):
+            return Func("sub", lambda a: a - other, [self])
+        return Func("sub", lambda a1, a2: a1 - a2, [self, other])
     def __rsub__(self, other: Union[ColT, float]) -> "Func":
-        return math_sub(other, self)
+        if isinstance(other, (int, float)):
+            return Func("sub", lambda a: other - a, [self])
+        return Func("sub", lambda a1, a2: a1 - a2, [other, self])
     def __mul__(self, other: Union[ColT, float]) -> "Func":
-        return math_mul(self, other)
+        if isinstance(other, (int, float)):
+            return Func("mul", lambda a: a * other, [self])
+        return Func("mul", lambda a1, a2: a1 * a2, [self, other])
     def __rmul__(self, other: Union[ColT, float]) -> "Func":
-        return math_mul(other, self)
+        if isinstance(other, (int, float)):
+            return Func("mul", lambda a: other * a, [self])
+        return Func("mul", lambda a1, a2: a1 * a2, [other, self])
     def __truediv__(self, other: Union[ColT, float]) -> "Func":
-        return math_truediv(self, other)
+        if isinstance(other, (int, float)):
+            return Func("div", lambda a: _truediv(a, other), [self], result_type=float)
+        return Func(
+            "div", lambda a1, a2: _truediv(a1, a2), [self, other], result_type=float
+        )
     def __rtruediv__(self, other: Union[ColT, float]) -> "Func":
-        return math_truediv(other, self)
+        if isinstance(other, (int, float)):
+            return Func("div", lambda a: _truediv(other, a), [self], result_type=float)
+        return Func(
+            "div", lambda a1, a2: _truediv(a1, a2), [other, self], result_type=float
+        )
     def __floordiv__(self, other: Union[ColT, float]) -> "Func":
-        return math_floordiv(self, other)
+        if isinstance(other, (int, float)):
+            return Func(
+                "floordiv", lambda a: _floordiv(a, other), [self], result_type=int
+            )
+        return Func(
+            "floordiv", lambda a1, a2: _floordiv(a1, a2), [self, other], result_type=int
+        )
     def __rfloordiv__(self, other: Union[ColT, float]) -> "Func":
-        return math_floordiv(other, self)
+        if isinstance(other, (int, float)):
+            return Func(
+                "floordiv", lambda a: _floordiv(other, a), [self], result_type=int
+            )
+        return Func(
+            "floordiv", lambda a1, a2: _floordiv(a1, a2), [other, self], result_type=int
+        )
     def __mod__(self, other: Union[ColT, float]) -> "Func":
-        return math_mod(self, other)
+        if isinstance(other, (int, float)):
+            return Func("mod", lambda a: a % other, [self], result_type=int)
+        return Func("mod", lambda a1, a2: a1 % a2, [self, other], result_type=int)
     def __rmod__(self, other: Union[ColT, float]) -> "Func":
-        return math_mod(other, self)
-    def __pow__(self, other: Union[ColT, float]) -> "Func":
-        return math_pow(self, other)
-    def __rpow__(self, other: Union[ColT, float]) -> "Func":
-        return math_pow(other, self)
-    def __lshift__(self, other: Union[ColT, float]) -> "Func":
-        return math_lshift(self, other)
-    def __rlshift__(self, other: Union[ColT, float]) -> "Func":
-        return math_lshift(other, self)
-    def __rshift__(self, other: Union[ColT, float]) -> "Func":
-        return math_rshift(self, other)
-    def __rrshift__(self, other: Union[ColT, float]) -> "Func":
-        return math_rshift(other, self)
+        if isinstance(other, (int, float)):
+            return Func("mod", lambda a: other % a, [self], result_type=int)
+        return Func("mod", lambda a1, a2: a1 % a2, [other, self], result_type=int)
     def __and__(self, other: Union[ColT, float]) -> "Func":
-        return math_and(self, other)
+        if isinstance(other, (int, float)):
+            return Func(
+                "and", lambda a: numeric.bit_and(a, other), [self], result_type=int
+            )
+        return Func(
+            "and",
+            lambda a1, a2: numeric.bit_and(a1, a2),
+            [self, other],
+            result_type=int,
+        )
     def __rand__(self, other: Union[ColT, float]) -> "Func":
-        return math_and(other, self)
+        if isinstance(other, (int, float)):
+            return Func(
+                "and", lambda a: numeric.bit_and(other, a), [self], result_type=int
+            )
+        return Func(
+            "and",
+            lambda a1, a2: numeric.bit_and(a1, a2),
+            [other, self],
+            result_type=int,
+        )
     def __or__(self, other: Union[ColT, float]) -> "Func":
-        return math_or(self, other)
+        if isinstance(other, (int, float)):
+            return Func(
+                "or", lambda a: numeric.bit_or(a, other), [self], result_type=int
+            )
+        return Func(
+            "or", lambda a1, a2: numeric.bit_or(a1, a2), [self, other], result_type=int
+        )
     def __ror__(self, other: Union[ColT, float]) -> "Func":
-        return math_or(other, self)
+        if isinstance(other, (int, float)):
+            return Func(
+                "or", lambda a: numeric.bit_or(other, a), [self], result_type=int
+            )
+        return Func(
+            "or", lambda a1, a2: numeric.bit_or(a1, a2), [other, self], result_type=int
+        )
     def __xor__(self, other: Union[ColT, float]) -> "Func":
-        return math_xor(self, other)
+        if isinstance(other, (int, float)):
+            return Func(
+                "xor", lambda a: numeric.bit_xor(a, other), [self], result_type=int
+            )
+        return Func(
+            "xor",
+            lambda a1, a2: numeric.bit_xor(a1, a2),
+            [self, other],
+            result_type=int,
+        )
     def __rxor__(self, other: Union[ColT, float]) -> "Func":
-        return math_xor(other, self)
+        if isinstance(other, (int, float)):
+            return Func(
+                "xor", lambda a: numeric.bit_xor(other, a), [self], result_type=int
+            )
+        return Func(
+            "xor",
+            lambda a1, a2: numeric.bit_xor(a1, a2),
+            [other, self],
+            result_type=int,
+        )
+    def __rshift__(self, other: Union[ColT, float]) -> "Func":
+        if isinstance(other, (int, float)):
+            return Func(
+                "rshift",
+                lambda a: numeric.bit_rshift(a, other),
+                [self],
+                result_type=int,
+            )
+        return Func(
+            "rshift",
+            lambda a1, a2: numeric.bit_rshift(a1, a2),
+            [self, other],
+            result_type=int,
+        )
+    def __rrshift__(self, other: Union[ColT, float]) -> "Func":
+        if isinstance(other, (int, float)):
+            return Func(
+                "rshift",
+                lambda a: numeric.bit_rshift(other, a),
+                [self],
+                result_type=int,
+            )
+        return Func(
+            "rshift",
+            lambda a1, a2: numeric.bit_rshift(a1, a2),
+            [other, self],
+            result_type=int,
+        )
+    def __lshift__(self, other: Union[ColT, float]) -> "Func":
+        if isinstance(other, (int, float)):
+            return Func(
+                "lshift",
+                lambda a: numeric.bit_lshift(a, other),
+                [self],
+                result_type=int,
+            )
+        return Func(
+            "lshift",
+            lambda a1, a2: numeric.bit_lshift(a1, a2),
+            [self, other],
+            result_type=int,
+        )
+    def __rlshift__(self, other: Union[ColT, float]) -> "Func":
+        if isinstance(other, (int, float)):
+            return Func(
+                "lshift",
+                lambda a: numeric.bit_lshift(other, a),
+                [self],
+                result_type=int,
+            )
+        return Func(
+            "lshift",
+            lambda a1, a2: numeric.bit_lshift(a1, a2),
+            [other, self],
+            result_type=int,
+        )
     def __lt__(self, other: Union[ColT, float]) -> "Func":
-        return math_lt(self, other)
+        if isinstance(other, (int, float)):
+            return Func("lt", lambda a: a < other, [self], result_type=bool)
+        return Func("lt", lambda a1, a2: a1 < a2, [self, other], result_type=bool)
     def __le__(self, other: Union[ColT, float]) -> "Func":
-        return math_le(self, other)
+        if isinstance(other, (int, float)):
+            return Func("le", lambda a: a <= other, [self], result_type=bool)
+        return Func("le", lambda a1, a2: a1 <= a2, [self, other], result_type=bool)
     def __eq__(self, other):
-        return math_eq(self, other)
+        if isinstance(other, (int, float)):
+            return Func("eq", lambda a: a == other, [self], result_type=bool)
+        return Func("eq", lambda a1, a2: a1 == a2, [self, other], result_type=bool)
     def __ne__(self, other):
-        return math_ne(self, other)
+        if isinstance(other, (int, float)):
+            return Func("ne", lambda a: a != other, [self], result_type=bool)
+        return Func("ne", lambda a1, a2: a1 != a2, [self, other], result_type=bool)
     def __gt__(self, other: Union[ColT, float]) -> "Func":
-        return math_gt(self, other)
+        if isinstance(other, (int, float)):
+            return Func("gt", lambda a: a > other, [self], result_type=bool)
+        return Func("gt", lambda a1, a2: a1 > a2, [self, other], result_type=bool)
     def __ge__(self, other: Union[ColT, float]) -> "Func":
-        return math_ge(self, other)
+        if isinstance(other, (int, float)):
+            return Func("ge", lambda a: a >= other, [self], result_type=bool)
+        return Func("ge", lambda a1, a2: a1 >= a2, [self, other], result_type=bool)
     def label(self, label: str) -> "Func":
         return Func(
@@ -283,107 +423,12 @@ def get_db_col_type(signals_schema: "SignalSchema", col: ColT) -> "DataType":
     )
-def math_func(
-    name: str,
-    inner: Callable,
-    params: Sequence[Union[ColT, float]],
-    result_type: Optional["DataType"] = None,
-) -> Func:
-    """Returns math function from the columns."""
-    cols, args = [], []
-    for arg in params:
-        if isinstance(arg, (int, float)):
-            args.append(arg)
-        else:
-            cols.append(arg)
-    return Func(name, inner, cols=cols, args=args, result_type=result_type)
-def math_add(*args: Union[ColT, float]) -> Func:
-    """Computes the sum of the column."""
-    return math_func("add", lambda a1, a2: a1 + a2, args)
-def math_sub(*args: Union[ColT, float]) -> Func:
-    """Computes the diff of the column."""
-    return math_func("sub", lambda a1, a2: a1 - a2, args)
-def math_mul(*args: Union[ColT, float]) -> Func:
-    """Computes the product of the column."""
-    return math_func("mul", lambda a1, a2: a1 * a2, args)
-def math_truediv(*args: Union[ColT, float]) -> Func:
-    """Computes the division of the column."""
-    return math_func("div", lambda a1, a2: a1 / a2, args, result_type=float)
-def math_floordiv(*args: Union[ColT, float]) -> Func:
-    """Computes the floor division of the column."""
-    return math_func("floordiv", lambda a1, a2: a1 // a2, args, result_type=float)
-def math_mod(*args: Union[ColT, float]) -> Func:
-    """Computes the modulo of the column."""
-    return math_func("mod", lambda a1, a2: a1 % a2, args, result_type=float)
-def math_pow(*args: Union[ColT, float]) -> Func:
-    """Computes the power of the column."""
-    return math_func("pow", lambda a1, a2: a1**a2, args, result_type=float)
-def math_lshift(*args: Union[ColT, float]) -> Func:
-    """Computes the left shift of the column."""
-    return math_func("lshift", lambda a1, a2: a1 << a2, args, result_type=int)
-def math_rshift(*args: Union[ColT, float]) -> Func:
-    """Computes the right shift of the column."""
-    return math_func("rshift", lambda a1, a2: a1 >> a2, args, result_type=int)
-def math_and(*args: Union[ColT, float]) -> Func:
-    """Computes the logical AND of the column."""
-    return math_func("and", lambda a1, a2: a1 & a2, args, result_type=bool)
-def math_or(*args: Union[ColT, float]) -> Func:
-    """Computes the logical OR of the column."""
-    return math_func("or", lambda a1, a2: a1 | a2, args, result_type=bool)
-def math_xor(*args: Union[ColT, float]) -> Func:
-    """Computes the logical XOR of the column."""
-    return math_func("xor", lambda a1, a2: a1 ^ a2, args, result_type=bool)
-def math_lt(*args: Union[ColT, float]) -> Func:
-    """Computes the less than comparison of the column."""
-    return math_func("lt", lambda a1, a2: a1 < a2, args, result_type=bool)
-def math_le(*args: Union[ColT, float]) -> Func:
-    """Computes the less than or equal comparison of the column."""
-    return math_func("le", lambda a1, a2: a1 <= a2, args, result_type=bool)
-def math_eq(*args: Union[ColT, float]) -> Func:
-    """Computes the equality comparison of the column."""
-    return math_func("eq", lambda a1, a2: a1 == a2, args, result_type=bool)
-def math_ne(*args: Union[ColT, float]) -> Func:
-    """Computes the inequality comparison of the column."""
-    return math_func("ne", lambda a1, a2: a1 != a2, args, result_type=bool)
-def math_gt(*args: Union[ColT, float]) -> Func:
-    """Computes the greater than comparison of the column."""
-    return math_func("gt", lambda a1, a2: a1 > a2, args, result_type=bool)
+def _truediv(a, b):
+    # Using sqlalchemy.sql.func.divide here instead of / operator
+    # because of a bug in ClickHouse SQLAlchemy dialect
+    # See https://github.com/xzkostyan/clickhouse-sqlalchemy/issues/335
+    return sa_func.divide(a, b)
-def math_ge(*args: Union[ColT, float]) -> Func:
-    """Computes the greater than or equal comparison of the column."""
-    return math_func("ge", lambda a1, a2: a1 >= a2, args, result_type=bool)
+def _floordiv(a, b):
+    return cast(_truediv(a, b), Integer)

datachain/func/numeric.py ADDED Viewed

@@ -0,0 +1,162 @@
+from typing import Union
+from datachain.sql.functions import numeric
+from .func import ColT, Func
+def bit_and(*args: Union[ColT, int]) -> Func:
+    """
+    Computes the bitwise AND operation between two values.
+    Args:
+        args (str | int): Two values to compute the bitwise AND operation between.
+            If a string is provided, it is assumed to be the name of the column vector.
+            If an integer is provided, it is assumed to be a constant value.
+    Returns:
+        Func: A Func object that represents the bitwise AND function.
+    Example:
+        ```py
+        dc.mutate(
+            xor1=func.bit_and("signal.values", 0x0F),
+        )
+        ```
+    Notes:
+        - Result column will always be of type int.
+    """
+    cols, func_args = [], []
+    for arg in args:
+        if isinstance(arg, int):
+            func_args.append(arg)
+        else:
+            cols.append(arg)
+    if len(cols) + len(func_args) != 2:
+        raise ValueError("bit_and() requires exactly two arguments")
+    return Func(
+        "bit_and",
+        inner=numeric.bit_and,
+        cols=cols,
+        args=func_args,
+        result_type=int,
+    )
+def bit_or(*args: Union[ColT, int]) -> Func:
+    """
+    Computes the bitwise AND operation between two values.
+    Args:
+        args (str | int): Two values to compute the bitwise OR operation between.
+            If a string is provided, it is assumed to be the name of the column vector.
+            If an integer is provided, it is assumed to be a constant value.
+    Returns:
+        Func: A Func object that represents the bitwise OR function.
+    Example:
+        ```py
+        dc.mutate(
+            xor1=func.bit_or("signal.values", 0x0F),
+        )
+        ```
+    Notes:
+        - Result column will always be of type int.
+    """
+    cols, func_args = [], []
+    for arg in args:
+        if isinstance(arg, int):
+            func_args.append(arg)
+        else:
+            cols.append(arg)
+    if len(cols) + len(func_args) != 2:
+        raise ValueError("bit_or() requires exactly two arguments")
+    return Func(
+        "bit_or",
+        inner=numeric.bit_or,
+        cols=cols,
+        args=func_args,
+        result_type=int,
+    )
+def bit_xor(*args: Union[ColT, int]) -> Func:
+    """
+    Computes the bitwise XOR operation between two values.
+    Args:
+        args (str | int): Two values to compute the bitwise XOR operation between.
+            If a string is provided, it is assumed to be the name of the column vector.
+            If an integer is provided, it is assumed to be a constant value.
+    Returns:
+        Func: A Func object that represents the bitwise XOR function.
+    Example:
+        ```py
+        dc.mutate(
+            xor1=func.bit_xor("signal.values", 0x0F),
+        )
+        ```
+    Notes:
+        - Result column will always be of type int.
+    """
+    cols, func_args = [], []
+    for arg in args:
+        if isinstance(arg, int):
+            func_args.append(arg)
+        else:
+            cols.append(arg)
+    if len(cols) + len(func_args) != 2:
+        raise ValueError("bit_xor() requires exactly two arguments")
+    return Func(
+        "bit_xor",
+        inner=numeric.bit_xor,
+        cols=cols,
+        args=func_args,
+        result_type=int,
+    )
+def int_hash_64(col: Union[ColT, int]) -> Func:
+    """
+    Returns the 64-bit hash of an integer.
+    Args:
+        col (str | int): String to compute the hash of.
+            If a string is provided, it is assumed to be the name of the column.
+            If a int is provided, it is assumed to be an int literal.
+            If a Func is provided, it is assumed to be a function returning an int.
+    Returns:
+        Func: A Func object that represents the 64-bit hash function.
+    Example:
+        ```py
+        dc.mutate(
+            val_hash=func.int_hash_64("val"),
+        )
+        ```
+    Note:
+        - Result column will always be of type int.
+    """
+    cols, args = [], []
+    if isinstance(col, int):
+        args.append(col)
+    else:
+        cols.append(col)
+    return Func(
+        "int_hash_64", inner=numeric.int_hash_64, cols=cols, args=args, result_type=int
+    )

datachain/lib/dc.py CHANGED Viewed

@@ -1446,6 +1446,7 @@ class DataChain:
             tokenizer=tokenizer,
             tokenizer_kwargs=tokenizer_kwargs,
             num_samples=num_samples,
+            dc_settings=chain._settings,
         )
     def remove_file_signals(self) -> "Self":  # noqa: D102

datachain/lib/pytorch.py CHANGED Viewed

@@ -10,8 +10,10 @@ from torchvision.transforms import v2
 from tqdm import tqdm
 from datachain import Session
+from datachain.asyn import AsyncMapper
 from datachain.catalog import Catalog, get_catalog
 from datachain.lib.dc import DataChain
+from datachain.lib.settings import Settings
 from datachain.lib.text import convert_text
 if TYPE_CHECKING:
@@ -30,6 +32,8 @@ def label_to_int(value: str, classes: list) -> int:
 class PytorchDataset(IterableDataset):
+    prefetch: int = 2
     def __init__(
         self,
         name: str,
@@ -39,6 +43,7 @@ class PytorchDataset(IterableDataset):
         tokenizer: Optional[Callable] = None,
         tokenizer_kwargs: Optional[dict[str, Any]] = None,
         num_samples: int = 0,
+        dc_settings: Optional[Settings] = None,
     ):
         """
         Pytorch IterableDataset that streams DataChain datasets.
@@ -66,6 +71,11 @@ class PytorchDataset(IterableDataset):
             catalog = get_catalog()
         self._init_catalog(catalog)
+        dc_settings = dc_settings or Settings()
+        self.cache = dc_settings.cache
+        if (prefetch := dc_settings.prefetch) is not None:
+            self.prefetch = prefetch
     def _init_catalog(self, catalog: "Catalog"):
         # For compatibility with multiprocessing,
         # we can only store params in __init__(), as Catalog isn't picklable
@@ -82,51 +92,58 @@ class PytorchDataset(IterableDataset):
         wh = wh_cls(*wh_args, **wh_kwargs)
         return Catalog(ms, wh, **self._catalog_params)
-    def __iter__(self) -> Iterator[Any]:
-        if self.catalog is None:
-            self.catalog = self._get_catalog()
-        session = Session.get(catalog=self.catalog)
-        total_rank, total_workers = self.get_rank_and_workers()
+    def _rows_iter(self, total_rank: int, total_workers: int):
+        catalog = self._get_catalog()
+        session = Session("PyTorch", catalog=catalog)
         ds = DataChain.from_dataset(
             name=self.name, version=self.version, session=session
-        )
+        ).settings(cache=self.cache, prefetch=self.prefetch)
         ds = ds.remove_file_signals()
         if self.num_samples > 0:
             ds = ds.sample(self.num_samples)
         ds = ds.chunk(total_rank, total_workers)
+        yield from ds.collect()
+    def __iter__(self) -> Iterator[Any]:
+        total_rank, total_workers = self.get_rank_and_workers()
+        rows = self._rows_iter(total_rank, total_workers)
+        if self.prefetch > 0:
+            from datachain.lib.udf import _prefetch_input
+            rows = AsyncMapper(_prefetch_input, rows, workers=self.prefetch).iterate()
         desc = f"Parsed PyTorch dataset for rank={total_rank} worker"
-        with tqdm(desc=desc, unit=" rows") as pbar:
-            for row_features in ds.collect():
-                row = []
-                for fr in row_features:
-                    if hasattr(fr, "read"):
-                        row.append(fr.read())  # type: ignore[unreachable]
-                    else:
-                        row.append(fr)
-                # Apply transforms
-                if self.transform:
-                    try:
-                        if isinstance(self.transform, v2.Transform):
-                            row = self.transform(row)
-                        for i, val in enumerate(row):
-                            if isinstance(val, Image.Image):
-                                row[i] = self.transform(val)
-                    except ValueError:
-                        logger.warning(
-                            "Skipping transform due to unsupported data types."
-                        )
-                        self.transform = None
-                if self.tokenizer:
-                    for i, val in enumerate(row):
-                        if isinstance(val, str) or (
-                            isinstance(val, list) and isinstance(val[0], str)
-                        ):
-                            row[i] = convert_text(
-                                val, self.tokenizer, self.tokenizer_kwargs
-                            ).squeeze(0)  # type: ignore[union-attr]
-                yield row
-                pbar.update(1)
+        with tqdm(rows, desc=desc, unit=" rows", position=total_rank) as rows_it:
+            yield from map(self._process_row, rows_it)
+    def _process_row(self, row_features):
+        row = []
+        for fr in row_features:
+            if hasattr(fr, "read"):
+                row.append(fr.read())  # type: ignore[unreachable]
+            else:
+                row.append(fr)
+        # Apply transforms
+        if self.transform:
+            try:
+                if isinstance(self.transform, v2.Transform):
+                    row = self.transform(row)
+                for i, val in enumerate(row):
+                    if isinstance(val, Image.Image):
+                        row[i] = self.transform(val)
+            except ValueError:
+                logger.warning("Skipping transform due to unsupported data types.")
+                self.transform = None
+        if self.tokenizer:
+            for i, val in enumerate(row):
+                if isinstance(val, str) or (
+                    isinstance(val, list) and isinstance(val[0], str)
+                ):
+                    row[i] = convert_text(
+                        val, self.tokenizer, self.tokenizer_kwargs
+                    ).squeeze(0)  # type: ignore[union-attr]
+        return row
     @staticmethod
     def get_rank_and_workers() -> tuple[int, int]:

datachain/remote/studio.py CHANGED Viewed

@@ -119,18 +119,27 @@ class StudioClient:
                 "\tpip install 'datachain[remote]'"
             ) from None
-    def _send_request_msgpack(self, route: str, data: dict[str, Any]) -> Response[Any]:
+    def _send_request_msgpack(
+        self, route: str, data: dict[str, Any], method: Optional[str] = "POST"
+    ) -> Response[Any]:
         import msgpack
         import requests
-        response = requests.post(
-            f"{self.url}/{route}",
-            json={**data, "team_name": self.team},
+        kwargs = (
+            {"params": {**data, "team_name": self.team}}
+            if method == "GET"
+            else {"json": {**data, "team_name": self.team}}
+        )
+        response = requests.request(
+            method=method,  # type: ignore[arg-type]
+            url=f"{self.url}/{route}",
             headers={
                 "Content-Type": "application/json",
                 "Authorization": f"token {self.token}",
             },
             timeout=self.timeout,
+            **kwargs,  # type: ignore[arg-type]
         )
         ok = response.ok
         if not ok:
@@ -148,7 +157,9 @@ class StudioClient:
         return Response(response_data, ok, message)
     @retry_with_backoff(retries=5)
-    def _send_request(self, route: str, data: dict[str, Any]) -> Response[Any]:
+    def _send_request(
+        self, route: str, data: dict[str, Any], method: Optional[str] = "POST"
+    ) -> Response[Any]:
         """
         Function that communicate Studio API.
         It will raise an exception, and try to retry, if 5xx status code is
@@ -157,14 +168,21 @@ class StudioClient:
         """
         import requests
-        response = requests.post(
-            f"{self.url}/{route}",
-            json={**data, "team_name": self.team},
+        kwargs = (
+            {"params": {**data, "team_name": self.team}}
+            if method == "GET"
+            else {"json": {**data, "team_name": self.team}}
+        )
+        response = requests.request(
+            method=method,  # type: ignore[arg-type]
+            url=f"{self.url}/{route}",
             headers={
                 "Content-Type": "application/json",
                 "Authorization": f"token {self.token}",
             },
             timeout=self.timeout,
+            **kwargs,  # type: ignore[arg-type]
         )
         try:
             response.raise_for_status()
@@ -222,7 +240,7 @@ class StudioClient:
             yield path, response
     def ls_datasets(self) -> Response[LsData]:
-        return self._send_request("datachain/ls-datasets", {})
+        return self._send_request("datachain/datasets", {}, method="GET")
     def edit_dataset(
         self,
@@ -232,20 +250,14 @@ class StudioClient:
         labels: Optional[list[str]] = None,
     ) -> Response[DatasetInfoData]:
         body = {
+            "new_name": new_name,
             "dataset_name": name,
+            "description": description,
+            "labels": labels,
         }
-        if new_name is not None:
-            body["new_name"] = new_name
-        if description is not None:
-            body["description"] = description
-        if labels is not None:
-            body["labels"] = labels  # type: ignore[assignment]
         return self._send_request(
-            "datachain/edit-dataset",
+            "datachain/datasets",
             body,
         )
@@ -256,12 +268,13 @@ class StudioClient:
         force: Optional[bool] = False,
     ) -> Response[DatasetInfoData]:
         return self._send_request(
-            "datachain/rm-dataset",
+            "datachain/datasets",
             {
                 "dataset_name": name,
                 "version": version,
                 "force": force,
             },
+            method="DELETE",
         )
     def dataset_info(self, name: str) -> Response[DatasetInfoData]:
@@ -272,7 +285,9 @@ class StudioClient:
             return dataset_info
-        response = self._send_request("datachain/dataset-info", {"dataset_name": name})
+        response = self._send_request(
+            "datachain/datasets/info", {"dataset_name": name}, method="GET"
+        )
         if response.ok:
             response.data = _parse_dataset_info(response.data)
         return response
@@ -282,14 +297,16 @@ class StudioClient:
     ) -> Response[DatasetRowsData]:
         req_data = {"dataset_name": name, "dataset_version": version}
         return self._send_request_msgpack(
-            "datachain/dataset-rows",
+            "datachain/datasets/rows",
             {**req_data, "offset": offset, "limit": DATASET_ROWS_CHUNK_SIZE},
+            method="GET",
         )
     def dataset_stats(self, name: str, version: int) -> Response[DatasetStatsData]:
         response = self._send_request(
-            "datachain/dataset-stats",
+            "datachain/datasets/stats",
             {"dataset_name": name, "dataset_version": version},
+            method="GET",
         )
         if response.ok:
             response.data = DatasetStats(**response.data)
@@ -299,16 +316,18 @@ class StudioClient:
         self, name: str, version: int
     ) -> Response[DatasetExportSignedUrls]:
         return self._send_request(
-            "datachain/dataset-export",
+            "datachain/datasets/export",
             {"dataset_name": name, "dataset_version": version},
+            method="GET",
         )
     def dataset_export_status(
         self, name: str, version: int
     ) -> Response[DatasetExportStatus]:
         return self._send_request(
-            "datachain/dataset-export-status",
+            "datachain/datasets/export-status",
             {"dataset_name": name, "dataset_version": version},
+            method="GET",
         )
     def upload_file(self, file_name: str, content: bytes) -> Response[FileUploadData]:

datachain/sql/functions/array.py CHANGED Viewed

@@ -38,6 +38,10 @@ class length(GenericFunction):  # noqa: N801
 class sip_hash_64(GenericFunction):  # noqa: N801
+    """
+    Computes the SipHash-64 hash of the array.
+    """
     type = Int64()
     package = "hash"
     name = "sip_hash_64"

datachain/sql/functions/numeric.py ADDED Viewed

@@ -0,0 +1,43 @@
+from sqlalchemy.sql.functions import GenericFunction, ReturnTypeFromArgs
+from datachain.sql.types import Int64
+from datachain.sql.utils import compiler_not_implemented
+class bit_and(ReturnTypeFromArgs):  # noqa: N801
+    inherit_cache = True
+class bit_or(ReturnTypeFromArgs):  # noqa: N801
+    inherit_cache = True
+class bit_xor(ReturnTypeFromArgs):  # noqa: N801
+    inherit_cache = True
+class bit_rshift(ReturnTypeFromArgs):  # noqa: N801
+    inherit_cache = True
+class bit_lshift(ReturnTypeFromArgs):  # noqa: N801
+    inherit_cache = True
+class int_hash_64(GenericFunction):  # noqa: N801
+    """
+    Computes the 64-bit hash of an integer.
+    """
+    type = Int64()
+    package = "hash"
+    name = "int_hash_64"
+    inherit_cache = True
+compiler_not_implemented(bit_and)
+compiler_not_implemented(bit_or)
+compiler_not_implemented(bit_xor)
+compiler_not_implemented(bit_rshift)
+compiler_not_implemented(bit_lshift)
+compiler_not_implemented(int_hash_64)

datachain/sql/sqlite/base.py CHANGED Viewed

@@ -15,7 +15,14 @@ from sqlalchemy.sql.elements import literal
 from sqlalchemy.sql.expression import case
 from sqlalchemy.sql.functions import func
-from datachain.sql.functions import aggregate, array, conditional, random, string
+from datachain.sql.functions import (
+    aggregate,
+    array,
+    conditional,
+    numeric,
+    random,
+    string,
+)
 from datachain.sql.functions import path as sql_path
 from datachain.sql.selectable import Values, base_values_compiler
 from datachain.sql.sqlite.types import (
@@ -47,6 +54,8 @@ slash = literal("/")
 empty_str = literal("")
 dot = literal(".")
+MAX_INT64 = 2**64 - 1
 def setup():
     global setup_is_complete  # noqa: PLW0603
@@ -89,6 +98,12 @@ def setup():
     compiles(aggregate.group_concat, "sqlite")(compile_group_concat)
     compiles(aggregate.any_value, "sqlite")(compile_any_value)
     compiles(aggregate.collect, "sqlite")(compile_collect)
+    compiles(numeric.bit_and, "sqlite")(compile_bitwise_and)
+    compiles(numeric.bit_or, "sqlite")(compile_bitwise_or)
+    compiles(numeric.bit_xor, "sqlite")(compile_bitwise_xor)
+    compiles(numeric.bit_rshift, "sqlite")(compile_bitwise_rshift)
+    compiles(numeric.bit_lshift, "sqlite")(compile_bitwise_lshift)
+    compiles(numeric.int_hash_64, "sqlite")(compile_int_hash_64)
     if load_usearch_extension(sqlite3.connect(":memory:")):
         compiles(array.cosine_distance, "sqlite")(compile_cosine_distance_ext)
@@ -163,6 +178,19 @@ def sqlite_string_split(string: str, sep: str, maxsplit: int = -1) -> str:
     return orjson.dumps(string.split(sep, maxsplit)).decode("utf-8")
+def sqlite_int_hash_64(x: int) -> int:
+    """IntHash64 implementation from ClickHouse."""
+    x ^= 0x4CF2D2BAAE6DA887
+    x ^= x >> 33
+    x = (x * 0xFF51AFD7ED558CCD) & MAX_INT64
+    x ^= x >> 33
+    x = (x * 0xC4CEB9FE1A85EC53) & MAX_INT64
+    x ^= x >> 33
+    # SQLite does not support unsigned 64-bit integers,
+    # so we need to convert to signed 64-bit
+    return x if x < 1 << 63 else (x & MAX_INT64) - (1 << 64)
 def register_user_defined_sql_functions() -> None:
     # Register optional functions if we have the necessary dependencies
     # and otherwise register functions that will raise an exception with
@@ -185,6 +213,21 @@ def register_user_defined_sql_functions() -> None:
     _registered_function_creators["vector_functions"] = create_vector_functions
+    def create_numeric_functions(conn):
+        conn.create_function("divide", 2, lambda a, b: a / b, deterministic=True)
+        conn.create_function("bitwise_and", 2, lambda a, b: a & b, deterministic=True)
+        conn.create_function("bitwise_or", 2, lambda a, b: a | b, deterministic=True)
+        conn.create_function("bitwise_xor", 2, lambda a, b: a ^ b, deterministic=True)
+        conn.create_function(
+            "bitwise_rshift", 2, lambda a, b: a >> b, deterministic=True
+        )
+        conn.create_function(
+            "bitwise_lshift", 2, lambda a, b: a << b, deterministic=True
+        )
+        conn.create_function("int_hash_64", 1, sqlite_int_hash_64, deterministic=True)
+    _registered_function_creators["numeric_functions"] = create_numeric_functions
     def sqlite_regexp_replace(string: str, pattern: str, replacement: str) -> str:
         return re.sub(pattern, replacement, string)
@@ -316,6 +359,30 @@ def compile_euclidean_distance(element, compiler, **kwargs):
     return f"euclidean_distance({compiler.process(element.clauses, **kwargs)})"
+def compile_bitwise_and(element, compiler, **kwargs):
+    return compiler.process(func.bitwise_and(*element.clauses.clauses), **kwargs)
+def compile_bitwise_or(element, compiler, **kwargs):
+    return compiler.process(func.bitwise_or(*element.clauses.clauses), **kwargs)
+def compile_bitwise_xor(element, compiler, **kwargs):
+    return compiler.process(func.bitwise_xor(*element.clauses.clauses), **kwargs)
+def compile_bitwise_rshift(element, compiler, **kwargs):
+    return compiler.process(func.bitwise_rshift(*element.clauses.clauses), **kwargs)
+def compile_bitwise_lshift(element, compiler, **kwargs):
+    return compiler.process(func.bitwise_lshift(*element.clauses.clauses), **kwargs)
+def compile_int_hash_64(element, compiler, **kwargs):
+    return compiler.process(func.int_hash_64(*element.clauses.clauses), **kwargs)
 def py_json_array_length(arr):
     return len(orjson.loads(arr))

datachain/studio.py CHANGED Viewed

@@ -155,7 +155,7 @@ def edit_studio_dataset(
     if not response.ok:
         raise_remote_error(response.message)
-    print(f"Dataset {name} updated")
+    print(f"Dataset '{name}' updated in Studio")
 def remove_studio_dataset(
@@ -169,7 +169,7 @@ def remove_studio_dataset(
     if not response.ok:
         raise_remote_error(response.message)
-    print(f"Dataset {name} removed")
+    print(f"Dataset '{name}' removed from Studio")
 def save_config(hostname, token):

{datachain-0.7.7.dist-info → datachain-0.7.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datachain
-Version: 0.7.7
+Version: 0.7.9
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License: Apache-2.0
@@ -98,7 +98,7 @@ Requires-Dist: unstructured[embed-huggingface,pdf]<0.16.0; extra == "examples"
 Requires-Dist: pdfplumber==0.11.4; extra == "examples"
 Requires-Dist: huggingface_hub[hf_transfer]; extra == "examples"
 Requires-Dist: onnx==1.16.1; extra == "examples"
-Requires-Dist: ultralytics==8.3.29; extra == "examples"
+Requires-Dist: ultralytics==8.3.37; extra == "examples"
 ================
 |logo| DataChain

{datachain-0.7.7.dist-info → datachain-0.7.9.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ datachain/__init__.py,sha256=ofPJ6B-d-ybSDRrE7J6wqF_ZRAB2W9U8l-eeuBtqPLg,865
 datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
 datachain/asyn.py,sha256=5aKrjnUxk0mtnZeFKNJd1DCE0MsnSoyJBZkr0y9H_a0,9313
 datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
-datachain/cli.py,sha256=Ysm-6Kb-54FfkN35VJIe5vW7Kik8VGA3wcyCUnqPBHg,42245
+datachain/cli.py,sha256=wQiYQ_qSVCGvS06pkknT9_FIBdFRzBdeRusW9uXE3vQ,42505
 datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
 datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
 datachain/dataset.py,sha256=P-pDBgvPqJGDhq_I7fwCfb6hY8E8mIAO8Q0NT7SNlNE,19128
@@ -14,7 +14,7 @@ datachain/nodes_fetcher.py,sha256=ILMzUW5o4_6lUOVrLDC9gJPCXfcgKnMG68plrc7dAOA,11
 datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
 datachain/progress.py,sha256=5KotcvvzAUL_RF0GEj4JY0IB1lyImnmHxe89YkT1XO4,4330
 datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datachain/studio.py,sha256=MthVADn-jM2I5TlESOfbzFnKGZjpuk9bM8m2vqOK-C8,7227
+datachain/studio.py,sha256=Hr0Ha0kou0so4i8i-gWiXC1AYlJ2arI1D55cc7mi3tg,7253
 datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
 datachain/utils.py,sha256=-mSFowjIidJ4_sMXInvNHLn4rK_QnHuIlLuH1_lMGmI,13897
 datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
@@ -32,17 +32,18 @@ datachain/client/s3.py,sha256=CVHBUZ1Ic2Q3370nl-Bbe69phuWjFlrVv9dTJKBpRT0,6019
 datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
 datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
 datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
-datachain/data_storage/metastore.py,sha256=EzSsfR_l_84i1AewYygpdsJyzGqEmvXjpeohlYF7h4A,37435
+datachain/data_storage/metastore.py,sha256=hfTITcesE9XlUTxcCcdDyWGGep-QSjJL9DUxko5QCeI,37524
 datachain/data_storage/schema.py,sha256=-QVlRvD0dfu-ZFUxylEoSnLJLnleMEjVlcAb2OGu-AY,9895
 datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
 datachain/data_storage/sqlite.py,sha256=D_ZQ0PHmZzHO2dinv4naVJocUDIZUwV4WAz692C1cyk,22521
 datachain/data_storage/warehouse.py,sha256=tjIkU-5JywBR0apCyqTcwSyaRtGxhu2L7IVjrz-55uc,30802
-datachain/func/__init__.py,sha256=VAN7N2-eCHgidMCFI-fJTkCwdI1U_NIuCOgYc4sfYUQ,812
+datachain/func/__init__.py,sha256=oz-GbCcp5jnN82u6cghWTGzmU9IQvtvllOof73wE52g,934
 datachain/func/aggregate.py,sha256=7_IPrIwb2XSs3zG4iOr1eTvzn6kNVe2mkzvNzjusDHk,10942
 datachain/func/array.py,sha256=zHDNWuWLA7HVa9FEvQeHhVi00_xqenyleTqcLwkXWBI,5477
 datachain/func/base.py,sha256=wA0sBQAVyN9LPxoo7Ox83peS0zUVnyuKxukwAcjGLfY,534
 datachain/func/conditional.py,sha256=mQroxsoExpBW84Zm5dAYP4OpBblWmzfnF2qJq9rba54,2223
-datachain/func/func.py,sha256=GykhTvNbACFSwaSXsgVlDnqR48kpP_GNAxm3bcq1RYg,12560
+datachain/func/func.py,sha256=mJ_rOXMpoqnK4-d5eF9boSMx5hWzgKoMLPGpZQqLAfw,15222
+datachain/func/numeric.py,sha256=GcUX6ijZvzfac8CZrHE0gRc9WCPiutcMLKqNXtbn-Yo,4186
 datachain/func/path.py,sha256=mqN_mfkwv44z2II7DMTp_fGGw95hmTCNls_TOFNpr4k,3155
 datachain/func/random.py,sha256=pENOLj9rSmWfGCnOsUIaCsVC5486zQb66qfQvXaz9Z4,452
 datachain/func/string.py,sha256=NQzaXXYu7yb72HPADy4WrFlcgvTS77L9x7-qvCKJtnk,4522
@@ -52,7 +53,7 @@ datachain/lib/arrow.py,sha256=b5efxAUaNNYVwtXVJqj07D3zf5KC-BPlLCxKEZbEG6w,9429
 datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
 datachain/lib/data_model.py,sha256=zS4lmXHVBXc9ntcyea2a1CRLXGSAN_0glXcF88CohgY,2685
 datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
-datachain/lib/dc.py,sha256=t5y5tsYyU7uuk3gEPPhhcDSZ1tL1aHkKG2W54eHiUq8,89492
+datachain/lib/dc.py,sha256=xqLR4IH_mbuet0FsxBHDsRUg-zR6tO8UZdLQQTLG8EE,89533
 datachain/lib/file.py,sha256=-XMkL6ED1sE7TMhWoMRTEuOXswZJw8X6AEmJDONFP74,15019
 datachain/lib/hf.py,sha256=a-zFpDmZIR4r8dlNNTjfpAKSnuJ9xyRXlgcdENiXt3E,5864
 datachain/lib/image.py,sha256=AMXYwQsmarZjRbPCZY3M1jDsM2WAB_b3cTY4uOIuXNU,2675
@@ -60,7 +61,7 @@ datachain/lib/listing.py,sha256=cVkCp7TRVpcZKSx-Bbk9t51bQI9Mw0o86W6ZPhAsuzM,3667
 datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
 datachain/lib/meta_formats.py,sha256=anK2bDVbaeCCh0yvKUBaW2MVos3zRgdaSV8uSduzPcU,6680
 datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
-datachain/lib/pytorch.py,sha256=Nh6fUbQMLX8OpZvX4tw4bJjTCQpRKi0jSLgkJnLHdTM,5880
+datachain/lib/pytorch.py,sha256=QMJO_OGEMvBi2x71vGcG25agLzNwyLmF4Qx5iILlwaM,6350
 datachain/lib/settings.py,sha256=ZELRCTLbi5vzRPiDX6cQ9LLg9TefJ_A05gIGni0lll8,2535
 datachain/lib/signal_schema.py,sha256=_uh19nCKhiD9ua8oIN1Q8R9iYv1BZAuqTJCLYVmyW8k,24557
 datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
@@ -95,7 +96,7 @@ datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
 datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
 datachain/query/session.py,sha256=vvLIJ5b8eElovHLAWq_CZJXmN5t7C7iAZA7x9wPPOms,5905
 datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datachain/remote/studio.py,sha256=jp6NWo7OPUxqO8uYEHP0_XFlmj47rMxC80qKQ7rA3Xk,11024
+datachain/remote/studio.py,sha256=WiK6fpRAw0a6Dth4XXI0YInEHH4gDU7AUHHDNd3wJzg,11616
 datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
 datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
 datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
@@ -104,21 +105,22 @@ datachain/sql/default/__init__.py,sha256=XQ2cEZpzWiABqjV-6yYHUBGI9vN_UHxbxZENESm
 datachain/sql/default/base.py,sha256=QD-31C6JnyOXzogyDx90sUhm7QvgXIYpeHEASH84igU,628
 datachain/sql/functions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/sql/functions/aggregate.py,sha256=3AQdA8YHPFdtCEfwZKQXTT8SlQWdG9gD5PBtGN3Odqs,944
-datachain/sql/functions/array.py,sha256=rvH27SWN9gdh_mFnp0GIiXuCrNW6n8ZbY4I_JUS-_e0,1140
+datachain/sql/functions/array.py,sha256=Zq59CaMHf_hFapU4kxvy2mwteH344k5Wksxja4MfBks,1204
 datachain/sql/functions/conditional.py,sha256=q7YUKfunXeEldXaxgT-p5pUTcOEVU_tcQ2BJlquTRPs,207
+datachain/sql/functions/numeric.py,sha256=DFTTEWsvBBXwbaaC4zdxhAoqUYwI6nbymG-nzbzdPv8,972
 datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0mg,1294
 datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
 datachain/sql/functions/string.py,sha256=DYgiw8XSk7ge7GXvyRI1zbaMruIizNeI-puOjriQGZQ,1148
 datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
-datachain/sql/sqlite/base.py,sha256=X4iEynOAqqvqz8lmgUKvURleKO6aguULgG8RoufKrSk,14772
+datachain/sql/sqlite/base.py,sha256=eQv2U32jChG9tnYSFE4SS2Mvfb7-W3Ok3Ffhew9qkKI,17254
 datachain/sql/sqlite/types.py,sha256=lPXS1XbkmUtlkkiRxy_A_UzsgpPv2VSkXYOD4zIHM4w,1734
 datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
 datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
 datachain/toolkit/split.py,sha256=ZgDcrNiKiPXZmKD591_1z9qRIXitu5zwAsoVPB7ykiU,2508
 datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
-datachain-0.7.7.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
-datachain-0.7.7.dist-info/METADATA,sha256=laxYaz9f-PIJ30f3krSjRu45CjyfbnBM8Q4kddXa9dM,18006
-datachain-0.7.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-datachain-0.7.7.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
-datachain-0.7.7.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
-datachain-0.7.7.dist-info/RECORD,,
+datachain-0.7.9.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
+datachain-0.7.9.dist-info/METADATA,sha256=iu58cwfGQVYTwn53symALXVpe9292EWXdOly2MWuPZY,18006
+datachain-0.7.9.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+datachain-0.7.9.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
+datachain-0.7.9.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
+datachain-0.7.9.dist-info/RECORD,,

{datachain-0.7.7.dist-info → datachain-0.7.9.dist-info}/LICENSE RENAMED Viewed

File without changes

{datachain-0.7.7.dist-info → datachain-0.7.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{datachain-0.7.7.dist-info → datachain-0.7.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datachain-0.7.7.dist-info → datachain-0.7.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

datachain 0.7.7__py3-none-any.whl → 0.7.9__py3-none-any.whl

Potentially problematic release.

datachain 0.7.7py3-none-any.whl → 0.7.9py3-none-any.whl