PyPI - datachain - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

datachain 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (20) hide show

datachain/__init__.py +0 -4
datachain/cli.py +8 -1
datachain/data_storage/schema.py +11 -5
datachain/data_storage/sqlite.py +3 -0
datachain/lib/cached_stream.py +3 -85
datachain/lib/dc.py +6 -2
datachain/lib/feature.py +5 -1
datachain/lib/feature_registry.py +3 -2
datachain/lib/file.py +10 -24
datachain/lib/udf.py +7 -26
datachain/query/dataset.py +3 -9
datachain/sql/sqlite/base.py +34 -2
datachain/sql/sqlite/vector.py +13 -5
{datachain-0.2.0.dist-info → datachain-0.2.1.dist-info}/METADATA +2 -2
{datachain-0.2.0.dist-info → datachain-0.2.1.dist-info}/RECORD +19 -20
datachain/_version.py +0 -16
{datachain-0.2.0.dist-info → datachain-0.2.1.dist-info}/LICENSE +0 -0
{datachain-0.2.0.dist-info → datachain-0.2.1.dist-info}/WHEEL +0 -0
{datachain-0.2.0.dist-info → datachain-0.2.1.dist-info}/entry_points.txt +0 -0
{datachain-0.2.0.dist-info → datachain-0.2.1.dist-info}/top_level.txt +0 -0

datachain/__init__.py CHANGED Viewed

@@ -1,4 +0,0 @@
-try:
-    from ._version import version as __version__
-except ImportError:
-    __version__ = "UNKNOWN"

datachain/cli.py CHANGED Viewed

@@ -5,13 +5,14 @@ import sys
 import traceback
 from argparse import SUPPRESS, Action, ArgumentParser, ArgumentTypeError, Namespace
 from collections.abc import Iterable, Iterator, Mapping, Sequence
+from importlib.metadata import PackageNotFoundError, version
 from itertools import chain
 from multiprocessing import freeze_support
 from typing import TYPE_CHECKING, Optional, Union
 import shtab
-from datachain import __version__, utils
+from datachain import utils
 from datachain.cli_utils import BooleanOptionalAction, CommaSeparatedArgs, KeyValueArgs
 from datachain.utils import DataChainDir
@@ -96,6 +97,12 @@ def add_show_args(parser: ArgumentParser) -> None:
 def get_parser() -> ArgumentParser:  # noqa: PLR0915
+    try:
+        __version__ = version("datachain")
+    except PackageNotFoundError:
+        # package is not installed
+        __version__ = "unknown"
     parser = ArgumentParser(
         description="DataChain: Wrangle unstructured AI data at scale", prog="datachain"
     )

datachain/data_storage/schema.py CHANGED Viewed

@@ -31,7 +31,7 @@ def dedup_columns(columns: Iterable[sa.Column]) -> list[sa.Column]:
     """
     c_set: dict[str, sa.Column] = {}
     for c in columns:
-        if ec := c_set.get(c.name, None):
+        if (ec := c_set.get(c.name, None)) is not None:
             if str(ec.type) != str(c.type):
                 raise ValueError(
                     f"conflicting types for column {c.name}:{c.type!s} and {ec.type!s}"
@@ -171,8 +171,8 @@ class DataTable:
     ):
         # copy columns, since re-using the same objects from another table
         # may raise an error
-        columns = [cls.copy_column(c) for c in columns if c.name != "id"]
-        columns = [sa.Column("id", Int, primary_key=True), *columns]
+        columns = cls.sys_columns() + [cls.copy_column(c) for c in columns]
+        columns = dedup_columns(columns)
         if metadata is None:
             metadata = sa.MetaData()
@@ -230,11 +230,17 @@ class DataTable:
     def delete(self):
         return self.apply_conditions(self.table.delete())
+    @staticmethod
+    def sys_columns():
+        return [
+            sa.Column("id", Int, primary_key=True),
+            sa.Column("random", Int64, nullable=False, default=f.random()),
+        ]
     @classmethod
     def file_columns(cls) -> list[sa.Column]:
         return [
-            sa.Column("id", Int, primary_key=True),
-            sa.Column("random", Int64, nullable=False),
+            *cls.sys_columns(),
             sa.Column("vtype", String, nullable=False, index=True),
             sa.Column("dir_type", Int, index=True),
             sa.Column("parent", String, index=True),

datachain/data_storage/sqlite.py CHANGED Viewed

@@ -33,6 +33,7 @@ from datachain.data_storage.schema import (
 from datachain.dataset import DatasetRecord
 from datachain.error import DataChainError
 from datachain.sql.sqlite import create_user_defined_sql_functions, sqlite_dialect
+from datachain.sql.sqlite.base import load_usearch_extension
 from datachain.sql.types import SQLType
 from datachain.storage import StorageURI
 from datachain.utils import DataChainDir
@@ -114,6 +115,8 @@ class SQLiteDatabaseEngine(DatabaseEngine):
             if os.environ.get("DEBUG_SHOW_SQL_QUERIES"):
                 db.set_trace_callback(print)
+            load_usearch_extension(db)
             return cls(engine, MetaData(), db, db_file)
         except RuntimeError:
             raise DataChainError("Can't connect to SQLite DB") from None

datachain/lib/cached_stream.py CHANGED Viewed

@@ -1,6 +1,3 @@
-import os
-import shutil
-import tempfile
 from abc import ABC
 from contextlib import AbstractContextManager
@@ -8,9 +5,7 @@ from datachain.cache import UniqueId
 class AbstractCachedStream(AbstractContextManager, ABC):
-    def __init__(self, stream, size, catalog, uid: UniqueId):
-        self.stream = stream
-        self.size = size
+    def __init__(self, catalog, uid: UniqueId):
         self.catalog = catalog
         self.uid = uid
         self.mode = "rb"
@@ -19,86 +14,9 @@ class AbstractCachedStream(AbstractContextManager, ABC):
         self.mode = mode
-class ProgressiveCacheStream(AbstractCachedStream):
-    BUF_SIZE = 4096
-    def __init__(self, stream, size, catalog, uid: UniqueId):
-        super().__init__(stream, size, catalog, uid)
-        self.target_path = self.catalog.cache.path_from_checksum(self.uid.get_hash())
-        self.cached_file = None
-        self.temp_file = None
-        self.temp_file_pos = 0
-    def __enter__(self):
-        if os.path.exists(self.target_path):
-            self.cached_file = open(self.target_path, mode=self.mode)
-            return self.cached_file
-        tmp_dir = self.catalog.cache.tmp_dir
-        if not os.path.exists(tmp_dir):
-            os.makedirs(tmp_dir)
-        self.temp_file = tempfile.NamedTemporaryFile(
-            prefix=str(self.uid.get_hash()), dir=tmp_dir, delete=False
-        )
-        return self
-    def __exit__(self, *args):
-        self.close()
-    def read(self, size=-1):
-        buf = self.stream.read(size)
-        pos = self.stream.tell()
-        if pos >= self.temp_file_pos:
-            self._cache_catch_up(pos, buf)
-        return buf
-    def close(self):
-        if self.cached_file:
-            self.cached_file.close()
-        if self.temp_file:
-            if self.temp_file_pos < self.size:
-                self._cache_catch_up(self.size)
-            self.temp_file.close()
-            if not os.path.exists(self.target_path):
-                os.makedirs(os.path.dirname(self.target_path), exist_ok=True)
-                shutil.move(self.temp_file.name, self.target_path)
-            self.stream.close()
-    def _cache_catch_up(self, pos_target, latest_buf=None):
-        pos_to_restore = self.stream.tell()
-        try:
-            remainder = pos_target - self.temp_file_pos
-            self.stream.seek(self.temp_file_pos)
-            while remainder > 0:
-                chunk_size = min(self.BUF_SIZE, remainder)
-                buf = self.stream.read(chunk_size)
-                self._cache_update(buf)
-                remainder -= len(buf)
-        finally:
-            self.stream.seek(pos_to_restore)
-    def _cache_update(self, buf):
-        length = len(buf)
-        self.temp_file.write(buf)
-        self.temp_file_pos += length
-    def seek(self, offset, whence=0):
-        return self.stream.seek(offset, whence)
-    def tell(self):
-        return self.stream.tell()
 class PreCachedStream(AbstractCachedStream):
-    def __init__(self, stream, size, catalog, uid: UniqueId):
-        super().__init__(stream, size, catalog, uid)
+    def __init__(self, catalog, uid: UniqueId):
+        super().__init__(catalog, uid)
         self.client = self.catalog.get_client(self.uid.storage)
         self.cached_file = None

datachain/lib/dc.py CHANGED Viewed

@@ -39,6 +39,8 @@ if TYPE_CHECKING:
     import pandas as pd
     from typing_extensions import Self
+    from datachain.catalog import Catalog
 C = Column
@@ -200,10 +202,12 @@ class DataChain(DatasetQuery):
     def from_storage(
         cls,
         path,
+        *,
         type: Literal["binary", "text", "image"] = "binary",
+        catalog: Optional["Catalog"] = None,
         recursive: Optional[bool] = True,
         anon: bool = False,
-    ) -> "DataChain":
+    ) -> "Self":
         """Get data from a storage as a list of file with all file attributes. It
         returns the chain itself as usual.
@@ -220,7 +224,7 @@ class DataChain(DatasetQuery):
             ```
         """
         func = get_file(type)
-        return DataChain(path, recursive=recursive, anon=anon).map(file=func)
+        return cls(path, catalog=catalog, recursive=recursive, anon=anon).map(file=func)
     @classmethod
     def from_dataset(cls, name: str, version: Optional[int] = None) -> "DataChain":

datachain/lib/feature.py CHANGED Viewed

@@ -7,6 +7,7 @@ from datetime import datetime
 from functools import lru_cache
 from types import GenericAlias
 from typing import (
+    TYPE_CHECKING,
     Any,
     ClassVar,
     Literal,
@@ -39,6 +40,9 @@ from datachain.sql.types import (
     String,
 )
+if TYPE_CHECKING:
+    from datachain.catalog import Catalog
 FeatureStandardType = Union[
     type[int],
     type[str],
@@ -158,7 +162,7 @@ class Feature(BaseModel):
         s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
         return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1).lower()
-    def _set_stream(self, catalog, stream=None, caching_enabled: bool = False) -> None:
+    def _set_stream(self, catalog: "Catalog", caching_enabled: bool = False) -> None:
         pass
     @classmethod

datachain/lib/feature_registry.py CHANGED Viewed

@@ -1,6 +1,7 @@
+import logging
 from typing import Any, ClassVar, Optional
-from datachain.cli import logger
+logger = logging.getLogger(__name__)
 class Registry:
@@ -16,7 +17,7 @@ class Registry:
         version = fr._version  # type: ignore[attr-defined]
         if version in cls.reg[name]:
             full_name = f"{name}@{version}"
-            logger.warning(f"Feature {full_name} is already registered")
+            logger.warning("Feature %s is already registered", full_name)
         cls.reg[name][version] = fr
     @classmethod

datachain/lib/file.py CHANGED Viewed

@@ -2,11 +2,10 @@ import json
 from abc import ABC, abstractmethod
 from datetime import datetime
 from pathlib import Path
-from typing import Any, ClassVar, Literal, Optional, Union
+from typing import TYPE_CHECKING, Any, ClassVar, Literal, Optional, Union
 from urllib.parse import unquote, urlparse
 from urllib.request import url2pathname
-from fsspec import Callback
 from fsspec.implementations.local import LocalFileSystem
 from pydantic import Field, field_validator
@@ -18,6 +17,9 @@ from datachain.lib.utils import DataChainError
 from datachain.sql.types import JSON, Int, String
 from datachain.utils import TIME_ZERO
+if TYPE_CHECKING:
+    from datachain.catalog import Catalog
 class FileFeature(Feature):
     _is_file = True
@@ -182,26 +184,17 @@ class File(FileFeature):
     def open(self):
         if self._stream is None:
-            if self._catalog is None:
-                raise FileError(self, "stream is not set")
-            self._stream = self._open_stream()
+            raise FileError(self, "stream is not set")
         if self.location:
             return VFileRegistry.resolve(self, self.location)
         return self._stream
-    def _set_stream(
-        self, catalog=None, stream=None, caching_enabled: bool = False
-    ) -> None:
-        if self._catalog is None and catalog is None:
-            raise DataChainError(f"Cannot set file '{stream}' without catalog")
-        if catalog:
-            self._catalog = catalog
+    def _set_stream(self, catalog: "Catalog", caching_enabled: bool = False) -> None:
+        self._catalog = catalog
         stream_class = PreCachedStream if caching_enabled else PreDownloadStream
-        self._stream = stream_class(stream, self.size, self._catalog, self.get_uid())
+        self._stream = stream_class(self._catalog, self.get_uid())
         self._caching_enabled = caching_enabled
     def get_uid(self) -> UniqueId:
@@ -232,11 +225,6 @@ class File(FileFeature):
     def get_uri(self):
         return f"{self.source}/{self.get_full_name()}"
-    def _open_stream(self, cache: bool = False, cb: Optional[Callback] = None):
-        client = self._catalog.get_client(self.source)
-        uid = self.get_uid()
-        return client.open_object(uid, use_cache=cache, cb=cb)
     def get_path(self) -> str:
         path = unquote(self.get_uri())
         fs = self.get_fs()
@@ -258,10 +246,8 @@ class TextFile(File):
         super().__init__(**kwargs)
         self._stream = None
-    def _set_stream(
-        self, catalog=None, stream=None, caching_enabled: bool = False
-    ) -> None:
-        super()._set_stream(catalog, stream, caching_enabled)
+    def _set_stream(self, catalog: "Catalog", caching_enabled: bool = False) -> None:
+        super()._set_stream(catalog, caching_enabled)
         self._stream.set_mode("r")

datachain/lib/udf.py CHANGED Viewed

@@ -6,10 +6,10 @@ from typing import TYPE_CHECKING, Callable, Optional
 from datachain.lib.feature import Feature
 from datachain.lib.signal_schema import SignalSchema
 from datachain.lib.utils import DataChainError, DataChainParamsError
-from datachain.query import Stream, udf
+from datachain.query import udf
 if TYPE_CHECKING:
-    from dvxc.query.udf import UDFWrapper
+    from datachain.query.udf import UDFWrapper
 class UdfError(DataChainParamsError):
@@ -34,11 +34,6 @@ class UDFBase:
         params_spec = params.to_udf_spec()
         self.params_spec = list(params_spec.keys())
-        self._contains_stream = False
-        if params.contains_file():
-            self.params_spec.insert(0, Stream())  # type: ignore[arg-type]
-            self._contains_stream = True
         self.output_spec = output.to_udf_spec()
         self._catalog = None
@@ -122,18 +117,10 @@ class UDFBase:
             rows = [rows]
         objs = []
         for row in rows:
-            if self._contains_stream:
-                stream, *row = row
-            else:
-                stream = None
             obj_row = self.params.row_to_objs(row)
-            if self._contains_stream:
-                for obj in obj_row:
-                    if isinstance(obj, Feature):
-                        obj._set_stream(self._catalog, stream, True)
+            for obj in obj_row:
+                if isinstance(obj, Feature):
+                    obj._set_stream(self._catalog, caching_enabled=True)
             objs.append(obj_row)
         return objs
@@ -150,13 +137,7 @@ class UDFBase:
             output_map[name] = []
         for flat_obj in group:
-            if self._contains_stream:
-                position = 1
-                stream = flat_obj[0]
-            else:
-                position = 0
-                stream = None
+            position = 0
             for signal, (cls, length) in spec_map.items():
                 slice = flat_obj[position : position + length]
                 position += length
@@ -167,7 +148,7 @@ class UDFBase:
                     obj = slice[0]
                 if isinstance(obj, Feature):
-                    obj._set_stream(self._catalog, stream)
+                    obj._set_stream(self._catalog)
                 output_map[signal].append(obj)
         return list(output_map.values())

datachain/query/dataset.py CHANGED Viewed

@@ -1737,22 +1737,16 @@ class DatasetQuery:
             # Exclude the id column and let the db create it to avoid unique
             # constraint violations.
-            cols = [col.name for col in dr.get_table().c if col.name != "id"]
-            assert cols
             q = query.exclude(("id",))
             if q._order_by_clauses:
                 # ensuring we have id sorted by order by clause if it exists in a query
                 q = q.add_columns(
                     f.row_number().over(order_by=q._order_by_clauses).label("id")
                 )
-                cols.append("id")
-            self.catalog.warehouse.db.execute(
-                sqlalchemy.insert(dr.get_table()).from_select(cols, q),
-                **kwargs,
-            )
+            cols = tuple(c.name for c in q.columns)
+            insert_q = sqlalchemy.insert(dr.get_table()).from_select(cols, q)
+            self.catalog.warehouse.db.execute(insert_q, **kwargs)
             self.catalog.metastore.update_dataset_status(
                 dataset, DatasetStatus.COMPLETE, version=version
             )

datachain/sql/sqlite/base.py CHANGED Viewed

@@ -71,8 +71,6 @@ def setup():
     compiles(sql_path.name, "sqlite")(compile_path_name)
     compiles(sql_path.file_stem, "sqlite")(compile_path_file_stem)
     compiles(sql_path.file_ext, "sqlite")(compile_path_file_ext)
-    compiles(array.cosine_distance, "sqlite")(compile_cosine_distance)
-    compiles(array.euclidean_distance, "sqlite")(compile_euclidean_distance)
     compiles(array.length, "sqlite")(compile_array_length)
     compiles(string.length, "sqlite")(compile_string_length)
     compiles(string.split, "sqlite")(compile_string_split)
@@ -81,6 +79,13 @@ def setup():
     compiles(Values, "sqlite")(compile_values)
     compiles(random.rand, "sqlite")(compile_rand)
+    if load_usearch_extension(sqlite3.connect(":memory:")):
+        compiles(array.cosine_distance, "sqlite")(compile_cosine_distance_ext)
+        compiles(array.euclidean_distance, "sqlite")(compile_euclidean_distance_ext)
+    else:
+        compiles(array.cosine_distance, "sqlite")(compile_cosine_distance)
+        compiles(array.euclidean_distance, "sqlite")(compile_euclidean_distance)
     register_user_defined_sql_functions()
     setup_is_complete = True
@@ -246,11 +251,23 @@ def compile_path_file_ext(element, compiler, **kwargs):
     return compiler.process(path_file_ext(*element.clauses.clauses), **kwargs)
+def compile_cosine_distance_ext(element, compiler, **kwargs):
+    run_compiler_hook("cosine_distance")
+    return f"distance_cosine_f32({compiler.process(element.clauses, **kwargs)})"
 def compile_cosine_distance(element, compiler, **kwargs):
     run_compiler_hook("cosine_distance")
     return f"cosine_distance({compiler.process(element.clauses, **kwargs)})"
+def compile_euclidean_distance_ext(element, compiler, **kwargs):
+    run_compiler_hook("euclidean_distance")
+    return (
+        f"sqrt(distance_sqeuclidean_f32({compiler.process(element.clauses, **kwargs)}))"
+    )
 def compile_euclidean_distance(element, compiler, **kwargs):
     run_compiler_hook("euclidean_distance")
     return f"euclidean_distance({compiler.process(element.clauses, **kwargs)})"
@@ -330,3 +347,18 @@ def compile_values(element, compiler, **kwargs):
 def compile_rand(element, compiler, **kwargs):
     return compiler.process(func.random(), **kwargs)
+def load_usearch_extension(conn) -> bool:
+    try:
+        # usearch is part of the vector optional dependencies
+        # we use the extension's cosine and euclidean distance functions
+        from usearch import sqlite_path
+        conn.enable_load_extension(True)
+        conn.load_extension(sqlite_path())
+        conn.enable_load_extension(False)
+        return True
+    except Exception:  # noqa: BLE001
+        return False

datachain/sql/sqlite/vector.py CHANGED Viewed

@@ -1,15 +1,23 @@
-import json
+import math
 import numpy as np
-from scipy.spatial import distance
 def euclidean_distance(a: str, b: str):
-    a_np = np.array(json.loads(a))
-    b_np = np.array(json.loads(b))
+    a_np = np.fromstring(a[1:-1], sep=",")
+    b_np = np.fromstring(b[1:-1], sep=",")
     return np.linalg.norm(b_np - a_np)
 def cosine_distance(a: str, b: str):
-    return distance.cosine(json.loads(a), json.loads(b))
+    u = np.fromstring(a[1:-1], sep=",")
+    v = np.fromstring(b[1:-1], sep=",")
+    uv = np.inner(u, v)
+    uu = np.inner(u, u)
+    vv = np.inner(v, v)
+    dist = 1.0 - uv / math.sqrt(uu * vv)
+    return max(0, min(dist, 2.0))

{datachain-0.2.0.dist-info → datachain-0.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datachain
-Version: 0.2.0
+Version: 0.2.1
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License: Apache-2.0
@@ -79,7 +79,7 @@ Requires-Dist: open-clip-torch ; extra == 'tests'
 Requires-Dist: aiotools >=1.7.0 ; extra == 'tests'
 Requires-Dist: requests-mock ; extra == 'tests'
 Provides-Extra: vector
-Requires-Dist: scipy ; extra == 'vector'
+Requires-Dist: usearch ; extra == 'vector'
 |PyPI| |Python Version| |Codecov| |Tests| |License|

{datachain-0.2.0.dist-info → datachain-0.2.1.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,8 @@
-datachain/__init__.py,sha256=9a0qX6tqyA9KC3ahLmGarqlRTZJXhM7HijAWpfUaOnQ,102
+datachain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
-datachain/_version.py,sha256=H-qsvrxCpdhaQzyddR-yajEqI71hPxLa4KxzpP3uS1g,411
 datachain/asyn.py,sha256=CKCFQJ0CbB3r04S7mUTXxriKzPnOvdUaVPXjM8vCtJw,7644
 datachain/cache.py,sha256=FaPWrqWznPffmskTb1pdPkt2jAMMf__9FC2zEnP0vDU,4022
-datachain/cli.py,sha256=FLKRimIq917Dq0EmG3yLzMTqDaMA0vyCRUREOobUspY,32256
+datachain/cli.py,sha256=lInqYMhk8YuPY-ZWkfWZmE-ZmdIChJgbs305-a_MWpo,32457
 datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
 datachain/config.py,sha256=PfC7W5yO6HFO6-iMB4YB-0RR88LPiGmD6sS_SfVbGso,1979
 datachain/dataset.py,sha256=MZezyuJWNj_3PEtzr0epPMNyWAOTrhTSPI5FmemV6L4,14470
@@ -33,19 +32,19 @@ datachain/data_storage/db_engine.py,sha256=mxOoWP4ntBMgLeTAk4dlEeIJArAz4x_tFrHyt
 datachain/data_storage/id_generator.py,sha256=VlDALKijggegAnNMJwuMETJgnLoPYxpkrkld5DNTPQw,3839
 datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
 datachain/data_storage/metastore.py,sha256=y-4fYvuOPnWeYxAvqhDnw6CdlTvQiurg0Gg4TaG9LR0,54074
-datachain/data_storage/schema.py,sha256=FrhmeZ_btT1CfVisa4ScabS11ixZ3xn3d_whvVsBtDA,8700
+datachain/data_storage/schema.py,sha256=t58LexPOCam_vWV0W52otEDNXgtFPHX3QFApEncFy2s,8809
 datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
-datachain/data_storage/sqlite.py,sha256=eHTiJ0VIxU-chnhKNTN14EsaSnw5LAaxTLi9aMCZpl4,24978
+datachain/data_storage/sqlite.py,sha256=F68Q_AIqNAObZ5kJ0GnBqRC6e2D2sRehkQo8UzrHgtI,25079
 datachain/data_storage/warehouse.py,sha256=tL2mYoXVZe-coKLTRXEJ0sMdEr2BD0GwgIWip5PP5CM,33300
 datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datachain/lib/arrow.py,sha256=7lAas8hSh3vL7S7s2KOlkYn4viQpfVbM_FQ_hLCh5oc,2593
-datachain/lib/cached_stream.py,sha256=BQI6gpJ2y7_-jqQo_0VB9ntbkOVISvj9wlDwGDQbqw8,3537
+datachain/lib/cached_stream.py,sha256=t2ifK0hZVZiVn0MQ8D3FaFK1-qK84TwJW2Dw1SRsw9g,1066
 datachain/lib/claude.py,sha256=iAauA1zNVNONpLzUo1t0QN5PZ5Ot6cZkfib7Ka_c638,1969
-datachain/lib/dc.py,sha256=szYQC4FOoYDMlSEDAPWZ25z4Nn-WeoaKiqKwwXbOJws,35355
-datachain/lib/feature.py,sha256=KiPiMrU8ec-bJuUs70Xh4jytZdzKk9puQNQnx03K-po,12057
-datachain/lib/feature_registry.py,sha256=YQsLYChNkYK6p2MpcVfAyBybtfN5EMiOJ8LIYakjmeQ,1602
+datachain/lib/dc.py,sha256=PBbEZhSPnbvB6jh2eTgZyDSouAGbjgEv8xabW45_vmk,35460
+datachain/lib/feature.py,sha256=QDloA9HE7URf9J_veKrguYBvSg-0cbXZFTswNxrKsB8,12135
+datachain/lib/feature_registry.py,sha256=K3jGQzBp2HZDjR9hdGe1BZaXOAne8RpkCRRQdTVjkTs,1622
 datachain/lib/feature_utils.py,sha256=LIK233IWGWFhuav5Rm8de0xIOSnuwA1ubk6OYrxrfN0,4712
-datachain/lib/file.py,sha256=K0jH8Q5Xle2TiVDTCzmopku_7Lh-IVufV_mgtaCNHYI,8744
+datachain/lib/file.py,sha256=GQrqGgCEHICrUTdzTz_yhXqJWiae9EPTte1sd3hKeEU,8246
 datachain/lib/gpt4_vision.py,sha256=idyXVZVWzltstGaVIu5RYE5UNbdqcPEjIWy81O1MwkM,2922
 datachain/lib/hf_image_to_text.py,sha256=HiPSWzJRDT-vnz9DXJbJBNCMNl9wmpxiSS3PbbVz8SE,3310
 datachain/lib/hf_pipeline.py,sha256=f0AH_XCziOF1OKN3d1w1swTBLaeajMJ8xgdsX37i5-o,2287
@@ -58,7 +57,7 @@ datachain/lib/reader.py,sha256=rPXXNoTUdm6PQwkAlaU-nOBreP_q4ett_EjFStrA_W0,1727
 datachain/lib/settings.py,sha256=6Nkoh8riETrftYwDp3aniK53Dsjc07MdztL8N0cW1D8,2849
 datachain/lib/signal_schema.py,sha256=KaH194dAH8Zt8FtlNAgdVqcZlJc42y7RbcB37ldPPAY,11688
 datachain/lib/text.py,sha256=EEZrYohADi5rAGg3aLLRwtvyAV9js_yWAGhr2C3QbwI,2424
-datachain/lib/udf.py,sha256=kPc_6fQ4DzbiYiXvbps7QPlJWTu9MSCS8eUfGqOhjG4,6124
+datachain/lib/udf.py,sha256=D9TMxkAvj3zPRnZmkCxadEDtiG3B45t2xAEpuO14MOQ,5600
 datachain/lib/udf_signature.py,sha256=DAWMQ0dvFkKabpY5MV5K2q9YmOSTKfiV8KuUBs_6kMg,7258
 datachain/lib/unstructured.py,sha256=9Y6rAelXdYqkNbPaqz6DhXjhS8d6qXcP0ieIsWkzvkk,1143
 datachain/lib/utils.py,sha256=YQKzuW096SGe7QwHwdyS47k_9l2Rh73b-wBqt1-niw4,213
@@ -68,7 +67,7 @@ datachain/lib/webdataset_laion.py,sha256=HAtSCbVvEQqzKkoRamRxDKaQALSB3QmJRU2yWRF
 datachain/query/__init__.py,sha256=tv-spkjUCYamMN9ys_90scYrZ8kJ7C7d1MTYVmxGtk4,325
 datachain/query/batch.py,sha256=sOMxXbaNii7lVyFIEZ2noqbhy_S8qtZ-WWxrka72shc,3474
 datachain/query/builtins.py,sha256=ZKNs49t8Oa_OaboCBIEqtXZt7c1Qe9OR_C_HpoDriIU,2781
-datachain/query/dataset.py,sha256=c0ZoNEjAMmn0BdSnRm8XRWEsbaMH3xa_jd6FBJQDY1o,64576
+datachain/query/dataset.py,sha256=QYrtZApS8djybkuDfGO0tt8O6sCBlmkg9TE__R4eM-I,64475
 datachain/query/dispatch.py,sha256=fEk1qalxAb5JJhN-iq0Mg9MyWve4XoN1Q7uvrX4mJY4,13106
 datachain/query/metrics.py,sha256=vsECqbZfoSDBnvC3GQlziKXmISVYDLgHP1fMPEOtKyo,640
 datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
@@ -90,12 +89,12 @@ datachain/sql/functions/path.py,sha256=zixpERotTFP6LZ7I4TiGtyRA8kXOoZmH1yzH9oRW0
 datachain/sql/functions/random.py,sha256=vBwEEj98VH4LjWixUCygQ5Bz1mv1nohsCG0-ZTELlVg,271
 datachain/sql/functions/string.py,sha256=DsyY6ZMAUqmZVRSla-BJLsLYNsIgLOh4XLR3yvYJUbE,505
 datachain/sql/sqlite/__init__.py,sha256=TAdJX0Bg28XdqPO-QwUVKy8rg78cgMileHvMNot7d04,166
-datachain/sql/sqlite/base.py,sha256=XVxn4pB-N4pPfiby5uVvfH7feNzRKlBNzsc5eyKPvhI,10965
+datachain/sql/sqlite/base.py,sha256=nPMF6_FF04hclDNZev_YfxMgbJAsWEdF-rU2pUhqBtc,12048
 datachain/sql/sqlite/types.py,sha256=oP93nLfTBaYnN0z_4Dsv-HZm8j9rrUf1esMM-z3JLbg,1754
-datachain/sql/sqlite/vector.py,sha256=stBeEW6fbVbILmAtV4khjXdJIGT13HkRWJeCoqIOk50,315
-datachain-0.2.0.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
-datachain-0.2.0.dist-info/METADATA,sha256=iMX8hWEMXu-4MtXlD_SVwW3ija6bOLqSbeQvHoiMNfQ,14344
-datachain-0.2.0.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
-datachain-0.2.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
-datachain-0.2.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
-datachain-0.2.0.dist-info/RECORD,,
+datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR0,469
+datachain-0.2.1.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
+datachain-0.2.1.dist-info/METADATA,sha256=kgX6auIOqU0DtW6dRyGWs1TrlGYLf1kN_By0XFW3t0Q,14346
+datachain-0.2.1.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
+datachain-0.2.1.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
+datachain-0.2.1.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
+datachain-0.2.1.dist-info/RECORD,,

datachain/_version.py DELETED Viewed

@@ -1,16 +0,0 @@
-# file generated by setuptools_scm
-# don't change, don't track in version control
-TYPE_CHECKING = False
-if TYPE_CHECKING:
-    from typing import Tuple, Union
-    VERSION_TUPLE = Tuple[Union[int, str], ...]
-else:
-    VERSION_TUPLE = object
-version: str
-__version__: str
-__version_tuple__: VERSION_TUPLE
-version_tuple: VERSION_TUPLE
-__version__ = version = '0.2.0'
-__version_tuple__ = version_tuple = (0, 2, 0)

{datachain-0.2.0.dist-info → datachain-0.2.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{datachain-0.2.0.dist-info → datachain-0.2.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{datachain-0.2.0.dist-info → datachain-0.2.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{datachain-0.2.0.dist-info → datachain-0.2.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

datachain 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

Potentially problematic release.

datachain 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl