PyPI - datachain - Versions diffs - 0.1.13__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

datachain 0.1.13py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datachain might be problematic. Click here for more details.

Files changed (49) hide show

datachain/__init__.py +0 -4
datachain/asyn.py +3 -3
datachain/catalog/__init__.py +3 -3
datachain/catalog/catalog.py +6 -6
datachain/catalog/loader.py +3 -3
datachain/cli.py +10 -2
datachain/client/azure.py +37 -1
datachain/client/fsspec.py +1 -1
datachain/client/local.py +1 -1
datachain/data_storage/__init__.py +1 -1
datachain/data_storage/metastore.py +11 -3
datachain/data_storage/schema.py +12 -7
datachain/data_storage/sqlite.py +3 -0
datachain/data_storage/warehouse.py +31 -30
datachain/dataset.py +1 -3
datachain/lib/arrow.py +85 -0
datachain/lib/cached_stream.py +3 -85
datachain/lib/dc.py +382 -179
datachain/lib/feature.py +46 -91
datachain/lib/feature_registry.py +4 -1
datachain/lib/feature_utils.py +2 -2
datachain/lib/file.py +30 -44
datachain/lib/image.py +9 -2
datachain/lib/meta_formats.py +66 -34
datachain/lib/settings.py +5 -5
datachain/lib/signal_schema.py +103 -105
datachain/lib/udf.py +10 -38
datachain/lib/udf_signature.py +11 -6
datachain/lib/webdataset_laion.py +5 -22
datachain/listing.py +8 -8
datachain/node.py +1 -1
datachain/progress.py +1 -1
datachain/query/builtins.py +1 -1
datachain/query/dataset.py +42 -119
datachain/query/dispatch.py +1 -1
datachain/query/metrics.py +19 -0
datachain/query/schema.py +13 -3
datachain/sql/__init__.py +1 -1
datachain/sql/sqlite/base.py +34 -2
datachain/sql/sqlite/vector.py +13 -5
datachain/utils.py +1 -122
{datachain-0.1.13.dist-info → datachain-0.2.1.dist-info}/METADATA +11 -4
{datachain-0.1.13.dist-info → datachain-0.2.1.dist-info}/RECORD +47 -47
{datachain-0.1.13.dist-info → datachain-0.2.1.dist-info}/WHEEL +1 -1
datachain/_version.py +0 -16
datachain/lib/parquet.py +0 -32
{datachain-0.1.13.dist-info → datachain-0.2.1.dist-info}/LICENSE +0 -0
{datachain-0.1.13.dist-info → datachain-0.2.1.dist-info}/entry_points.txt +0 -0
{datachain-0.1.13.dist-info → datachain-0.2.1.dist-info}/top_level.txt +0 -0

datachain/query/dataset.py CHANGED Viewed

@@ -56,13 +56,13 @@ from datachain.storage import Storage, StorageURI
 from datachain.utils import batched, determine_processes
 from .batch import RowBatch
+from .metrics import metrics
 from .schema import C, UDFParamSpec, normalize_param
 from .session import Session
 from .udf import UDFBase, UDFClassWrapper, UDFFactory, UDFType
 if TYPE_CHECKING:
     import pandas as pd
-    from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg
     from sqlalchemy.sql.elements import ClauseElement
     from sqlalchemy.sql.schema import Table
     from sqlalchemy.sql.selectable import GenerativeSelect
@@ -71,7 +71,6 @@ if TYPE_CHECKING:
     from datachain.catalog import Catalog
     from datachain.data_storage import AbstractWarehouse
     from datachain.dataset import DatasetRecord
-    from datachain.sql.types import SQLType
     from .udf import UDFResult
@@ -197,7 +196,7 @@ class IndexingStep(StartingStep):
     def apply(self):
         self.catalog.index([self.path], **self.kwargs)
         uri, path = self.parse_path()
-        partial_id, partial_path = self.catalog.metastore.get_valid_partial_id(
+        _partial_id, partial_path = self.catalog.metastore.get_valid_partial_id(
             uri, path
         )
         dataset = self.catalog.get_dataset(Storage.dataset_name(uri, partial_path))
@@ -523,30 +522,23 @@ class UDF(Step, ABC):
                     "cache": self.cache,
                 }
-                feature_module_name, feature_file = self.process_feature_module()
-                # Write the module content to a .py file
-                with open(f"{feature_module_name}.py", "w") as module_file:
-                    module_file.write(feature_file)
-                process_data = dumps(udf_info, recurse=True)
                 # Run the UDFDispatcher in another process to avoid needing
                 # if __name__ == '__main__': in user scripts
                 datachain_exec_path = os.environ.get("DATACHAIN_EXEC_PATH", "datachain")
                 envs = dict(os.environ)
                 envs.update({"PYTHONPATH": os.getcwd()})
-                try:
-                    result = subprocess.run(
-                        [datachain_exec_path, "--internal-run-udf"],  # noqa: S603
+                with self.process_feature_module():
+                    process_data = dumps(udf_info, recurse=True)
+                    result = subprocess.run(  # noqa: S603
+                        [datachain_exec_path, "--internal-run-udf"],
                         input=process_data,
                         check=False,
                         env=envs,
                     )
                     if result.returncode != 0:
                         raise RuntimeError("UDF Execution Failed!")
-                finally:
-                    os.unlink(f"{feature_module_name}.py")
             else:
                 # Otherwise process single-threaded (faster for smaller UDFs)
                 # Optionally instantiate the UDF instance if a class is provided.
@@ -600,6 +592,7 @@ class UDF(Step, ABC):
             self.catalog.warehouse.close()
             raise
+    @contextlib.contextmanager
     def process_feature_module(self):
         # Generate a random name for the feature module
         feature_module_name = "tmp" + _random_string(10)
@@ -611,10 +604,14 @@ class UDF(Step, ABC):
             for name, obj in inspect.getmembers(sys.modules["__main__"], _imports)
             if not (name.startswith("__") and name.endswith("__"))
         ]
+        main_module = sys.modules["__main__"]
         # Get the feature classes from the main module
-        feature_classes = dict(
-            inspect.getmembers(sys.modules["__main__"], _feature_predicate)
-        )
+        feature_classes = {
+            name: obj
+            for name, obj in main_module.__dict__.items()
+            if _feature_predicate(obj)
+        }
         # Get the source code of the feature classes
         feature_sources = [source.getsource(cls) for _, cls in feature_classes.items()]
         # Set the module name for the feature classes to the generated name
@@ -626,7 +623,18 @@ class UDF(Step, ABC):
         # Combine the import lines and feature sources
         feature_file = "".join(import_lines) + "\n".join(feature_sources)
-        return feature_module_name, feature_file
+        # Write the module content to a .py file
+        with open(f"{feature_module_name}.py", "w") as module_file:
+            module_file.write(feature_file)
+        try:
+            yield feature_module_name
+        finally:
+            for cls in feature_classes.values():
+                cls.__module__ = main_module.__name__
+            os.unlink(f"{feature_module_name}.py")
+            # Remove the dynamic module from sys.modules
+            del sys.modules[feature_module_name]
     def create_partitions_table(self, query: Select) -> "Table":
         """
@@ -685,8 +693,7 @@ class UDF(Step, ABC):
             )
         query, tables = self.process_input_query(query)
-        for t in tables:
-            temp_tables.append(t.name)
+        temp_tables.extend(t.name for t in tables)
         udf_table = self.create_udf_table(_query)
         temp_tables.append(udf_table.name)
         self.populate_udf_table(udf_table, query)
@@ -1120,6 +1127,12 @@ class DatasetQuery:
         indexing_feature_schema: Optional[dict] = None,
         indexing_column_types: Optional[dict[str, Any]] = None,
     ):
+        if client_config is None:
+            client_config = {}
+        if anon:
+            client_config["anon"] = True
         self.steps: list[Step] = []
         self.catalog = catalog or get_catalog(client_config=client_config)
         self._chunk_index: Optional[int] = None
@@ -1134,22 +1147,14 @@ class DatasetQuery:
         self.column_types: Optional[dict[str, Any]] = None
         self.session = Session.get(session, catalog=catalog)
-        if client_config is None:
-            client_config = {}
-        if anon:
-            client_config["anon"] = True
         if path:
-            self.starting_step = IndexingStep(
-                path, self.catalog, {"client_config": client_config}, recursive
-            )
+            self.starting_step = IndexingStep(path, self.catalog, {}, recursive)
             self.feature_schema = indexing_feature_schema
             self.column_types = indexing_column_types
         elif name:
             ds = self.catalog.get_dataset(name)
             self.version = version or ds.latest_version
-            self.feature_schema = ds.feature_schema
+            self.feature_schema = ds.get_version(self.version).feature_schema
             self.column_types = copy(ds.schema)
             if "id" in self.column_types:
                 self.column_types.pop("id")
@@ -1348,8 +1353,7 @@ class DatasetQuery:
             MapperCls = OrderedMapper if query._order_by_clauses else AsyncMapper  # noqa: N806
             with contextlib.closing(row_iter()) as rows:
                 mapper = MapperCls(get_params, rows, workers=workers)
-                for params in mapper.iterate():
-                    yield params
+                yield from mapper.iterate()
         finally:
             self.cleanup()
@@ -1386,82 +1390,6 @@ class DatasetQuery:
         records = self.to_records()
         return pd.DataFrame.from_records(records)
-    @classmethod
-    def from_dataframe(
-        cls,
-        df: Union["DataFrameXchg", "pd.DataFrame"],
-        name: str = "",
-        version: Optional[int] = None,
-        catalog: Optional["Catalog"] = None,
-        session: Optional[Session] = None,
-    ) -> "Self":
-        from datachain.utils import dtype_mapper
-        catalog = catalog or get_catalog()
-        assert catalog is not None
-        session = Session.get(session, catalog=catalog)
-        assert session is not None
-        try:
-            if name and version and catalog.get_dataset(name).has_version(version):
-                raise RuntimeError(f"Dataset {name} already has version {version}")
-        except DatasetNotFoundError:
-            pass
-        if not name and version:
-            raise RuntimeError("Cannot set version for temporary datasets")
-        import pandas as pd  # noqa: F401
-        from pandas.api.interchange import from_dataframe
-        # This is not optimal for dataframes other than pd.DataFrame, as it may copy
-        # all the data to a new dataframe.
-        pd_df = from_dataframe(df)
-        dtype: dict[str, type[SQLType]] = {
-            str(pd_df.columns[i]): dtype_mapper(pd_df.iloc[:, i])
-            for i in range(len(pd_df.columns))
-        }
-        name = name or session.generate_temp_dataset_name()
-        dataset = catalog.create_dataset(
-            name,
-            version=version,
-            columns=[Column(name, typ) for name, typ in dtype.items()],
-        )
-        version = version or dataset.latest_version
-        dr = catalog.warehouse.dataset_rows(dataset)
-        pd_df.to_sql(
-            dr.table.name,
-            catalog.warehouse.db.engine,
-            if_exists="append",
-            index=False,
-            chunksize=10_000,
-            dtype=dtype,
-        )
-        catalog.metastore.update_dataset_status(
-            dataset, DatasetStatus.COMPLETE, version=version
-        )
-        catalog.update_dataset_version_with_warehouse_info(dataset, version)
-        return cls(name=name, version=version, catalog=catalog, session=session)
-    from_pandas = from_dataframe
-    @classmethod
-    def from_parquet(
-        cls,
-        uri: str,
-        *args,
-        **kwargs,
-    ) -> "Self":
-        import pandas as pd
-        pd_df = pd.read_parquet(uri, dtype_backend="pyarrow")
-        return cls.from_dataframe(pd_df, *args, **kwargs)
     def shuffle(self) -> "Self":
         # ToDo: implement shaffle based on seed and/or generating random column
         return self.order_by(C.random)
@@ -1809,22 +1737,16 @@ class DatasetQuery:
             # Exclude the id column and let the db create it to avoid unique
             # constraint violations.
-            cols = [col.name for col in dr.get_table().c if col.name != "id"]
-            assert cols
             q = query.exclude(("id",))
             if q._order_by_clauses:
                 # ensuring we have id sorted by order by clause if it exists in a query
                 q = q.add_columns(
                     f.row_number().over(order_by=q._order_by_clauses).label("id")
                 )
-                cols.append("id")
-            self.catalog.warehouse.db.execute(
-                sqlalchemy.insert(dr.get_table()).from_select(cols, q),
-                **kwargs,
-            )
+            cols = tuple(c.name for c in q.columns)
+            insert_q = sqlalchemy.insert(dr.get_table()).from_select(cols, q)
+            self.catalog.warehouse.db.execute(insert_q, **kwargs)
             self.catalog.metastore.update_dataset_status(
                 dataset, DatasetStatus.COMPLETE, version=version
             )
@@ -1853,6 +1775,7 @@ def _get_output_fd_for_write() -> Union[str, int]:
 class ExecutionResult:
     preview: list[dict] = attrs.field(factory=list)
     dataset: Optional[tuple[str, int]] = None
+    metrics: dict[str, Any] = attrs.field(factory=dict)
 def _send_result(dataset_query: DatasetQuery) -> None:
@@ -1886,7 +1809,7 @@ def _send_result(dataset_query: DatasetQuery) -> None:
         dataset = dataset_query.name, dataset_query.version
     preview = preview_query.to_records()
-    result = ExecutionResult(preview, dataset)
+    result = ExecutionResult(preview, dataset, metrics)
     data = attrs.asdict(result)
     with open(_get_output_fd_for_write(), mode="w") as f:

datachain/query/dispatch.py CHANGED Viewed

@@ -257,7 +257,7 @@ class UDFDispatcher:
         if self.buffer_size < n_workers:
             raise RuntimeError(
-                f"Parallel run error: buffer size is smaller than "
+                "Parallel run error: buffer size is smaller than "
                 f"number of workers: {self.buffer_size} < {n_workers}"
             )

datachain/query/metrics.py ADDED Viewed

@@ -0,0 +1,19 @@
+from typing import Optional, Union
+metrics: dict[str, Union[str, int, float, bool, None]] = {}
+def set(key: str, value: Union[str, int, float, bool, None]) -> None:  # noqa: PYI041
+    """Set a metric value."""
+    if not isinstance(key, str):
+        raise TypeError("Key must be a string")
+    if not key:
+        raise ValueError("Key must not be empty")
+    if not isinstance(value, (str, int, float, bool, type(None))):
+        raise TypeError("Value must be a string, int, float or bool")
+    metrics[key] = value
+def get(key: str) -> Optional[Union[str, int, float, bool]]:
+    """Get a metric value."""
+    return metrics[key]

datachain/query/schema.py CHANGED Viewed

@@ -18,20 +18,30 @@ if TYPE_CHECKING:
     from datachain.dataset import RowDict
+DEFAULT_DELIMITER = "__"
 class ColumnMeta(type):
+    @staticmethod
+    def to_db_name(name: str) -> str:
+        return name.replace(".", DEFAULT_DELIMITER)
     def __getattr__(cls, name: str):
-        return cls(name)
+        return cls(ColumnMeta.to_db_name(name))
 class Column(sa.ColumnClause, metaclass=ColumnMeta):
     inherit_cache: Optional[bool] = True
     def __init__(self, text, type_=None, is_literal=False, _selectable=None):
-        self.name = text
+        self.name = ColumnMeta.to_db_name(text)
         super().__init__(
-            text, type_=type_, is_literal=is_literal, _selectable=_selectable
+            self.name, type_=type_, is_literal=is_literal, _selectable=_selectable
         )
+    def __getattr__(self, name: str):
+        return Column(self.name + DEFAULT_DELIMITER + name)
     def glob(self, glob_str):
         return self.op("GLOB")(glob_str)

datachain/sql/__init__.py CHANGED Viewed

@@ -7,10 +7,10 @@ from .selectable import select, values
 __all__ = [
     "column",
+    "functions",
     "literal",
     "select",
     "values",
-    "functions",
 ]
 default_setup()

datachain/sql/sqlite/base.py CHANGED Viewed

@@ -71,8 +71,6 @@ def setup():
     compiles(sql_path.name, "sqlite")(compile_path_name)
     compiles(sql_path.file_stem, "sqlite")(compile_path_file_stem)
     compiles(sql_path.file_ext, "sqlite")(compile_path_file_ext)
-    compiles(array.cosine_distance, "sqlite")(compile_cosine_distance)
-    compiles(array.euclidean_distance, "sqlite")(compile_euclidean_distance)
     compiles(array.length, "sqlite")(compile_array_length)
     compiles(string.length, "sqlite")(compile_string_length)
     compiles(string.split, "sqlite")(compile_string_split)
@@ -81,6 +79,13 @@ def setup():
     compiles(Values, "sqlite")(compile_values)
     compiles(random.rand, "sqlite")(compile_rand)
+    if load_usearch_extension(sqlite3.connect(":memory:")):
+        compiles(array.cosine_distance, "sqlite")(compile_cosine_distance_ext)
+        compiles(array.euclidean_distance, "sqlite")(compile_euclidean_distance_ext)
+    else:
+        compiles(array.cosine_distance, "sqlite")(compile_cosine_distance)
+        compiles(array.euclidean_distance, "sqlite")(compile_euclidean_distance)
     register_user_defined_sql_functions()
     setup_is_complete = True
@@ -246,11 +251,23 @@ def compile_path_file_ext(element, compiler, **kwargs):
     return compiler.process(path_file_ext(*element.clauses.clauses), **kwargs)
+def compile_cosine_distance_ext(element, compiler, **kwargs):
+    run_compiler_hook("cosine_distance")
+    return f"distance_cosine_f32({compiler.process(element.clauses, **kwargs)})"
 def compile_cosine_distance(element, compiler, **kwargs):
     run_compiler_hook("cosine_distance")
     return f"cosine_distance({compiler.process(element.clauses, **kwargs)})"
+def compile_euclidean_distance_ext(element, compiler, **kwargs):
+    run_compiler_hook("euclidean_distance")
+    return (
+        f"sqrt(distance_sqeuclidean_f32({compiler.process(element.clauses, **kwargs)}))"
+    )
 def compile_euclidean_distance(element, compiler, **kwargs):
     run_compiler_hook("euclidean_distance")
     return f"euclidean_distance({compiler.process(element.clauses, **kwargs)})"
@@ -330,3 +347,18 @@ def compile_values(element, compiler, **kwargs):
 def compile_rand(element, compiler, **kwargs):
     return compiler.process(func.random(), **kwargs)
+def load_usearch_extension(conn) -> bool:
+    try:
+        # usearch is part of the vector optional dependencies
+        # we use the extension's cosine and euclidean distance functions
+        from usearch import sqlite_path
+        conn.enable_load_extension(True)
+        conn.load_extension(sqlite_path())
+        conn.enable_load_extension(False)
+        return True
+    except Exception:  # noqa: BLE001
+        return False

datachain/sql/sqlite/vector.py CHANGED Viewed

@@ -1,15 +1,23 @@
-import json
+import math
 import numpy as np
-from scipy.spatial import distance
 def euclidean_distance(a: str, b: str):
-    a_np = np.array(json.loads(a))
-    b_np = np.array(json.loads(b))
+    a_np = np.fromstring(a[1:-1], sep=",")
+    b_np = np.fromstring(b[1:-1], sep=",")
     return np.linalg.norm(b_np - a_np)
 def cosine_distance(a: str, b: str):
-    return distance.cosine(json.loads(a), json.loads(b))
+    u = np.fromstring(a[1:-1], sep=",")
+    v = np.fromstring(b[1:-1], sep=",")
+    uv = np.inner(u, v)
+    uu = np.inner(u, u)
+    vv = np.inner(v, v)
+    dist = 1.0 - uv / math.sqrt(uu * vv)
+    return max(0, min(dist, 2.0))

datachain/utils.py CHANGED Viewed

@@ -18,9 +18,6 @@ from dateutil.parser import isoparse
 if TYPE_CHECKING:
     import pandas as pd
-    import pyarrow as pa
-    from datachain.sql.types import SQLType
 NUL = b"\0"
 TIME_ZERO = datetime.fromtimestamp(0, tz=timezone.utc)
@@ -78,7 +75,7 @@ class DataChainDir:
             if create:
                 instance.init()
             else:
-                NotADirectoryError(root)
+                raise NotADirectoryError(root)
         return instance
@@ -363,121 +360,3 @@ class JSONSerialize(json.JSONEncoder):
             return str(obj)
         return super().default(obj)
-def dtype_mapper(col: Union["pd.Index", "pd.Series"]) -> type["SQLType"]:  # noqa: PLR0911
-    from pandas import ArrowDtype
-    from pandas.api.types import infer_dtype
-    from datachain.sql.types import (
-        Binary,
-        Boolean,
-        DateTime,
-        Float,
-        Float32,
-        Float64,
-        Int,
-        Int32,
-        Int64,
-        String,
-        UInt64,
-    )
-    if isinstance(col.dtype, ArrowDtype):
-        return arrow_type_mapper(col.dtype.pyarrow_dtype)
-    col_type = infer_dtype(col, skipna=True)
-    if col_type in ("datetime", "datetime64"):
-        return DateTime
-    if col_type == "bytes":
-        return Binary
-    if col_type == "floating":
-        if col.dtype == "float32":
-            return Float32
-        if col.dtype == "float64":
-            return Float64
-        return Float
-    if col_type == "integer":
-        if col.dtype.name.lower() in ("int8", "int16", "int32"):
-            return Int32
-        if col.dtype.name.lower() == "int64":
-            return Int64
-        if col.dtype.name.lower().startswith("uint"):
-            return UInt64
-        return Int
-    if col_type == "boolean":
-        return Boolean
-    if col_type == "date":
-        return DateTime
-    if col_type in (
-        "complex",
-        "time",
-        "timedelta",
-        "timedelta64",
-        "period",
-        "interval",
-    ):
-        raise ValueError(f"{col_type!r} datatypes not supported")
-    return String
-def arrow_type_mapper(col_type: "pa.DataType") -> type["SQLType"]:  # noqa: PLR0911,C901
-    try:
-        import pyarrow as pa
-    except ImportError as exc:
-        raise ImportError(
-            "Missing required dependency pyarrow for inferring types"
-        ) from exc
-    from datachain.sql.types import (
-        JSON,
-        Array,
-        Binary,
-        Boolean,
-        DateTime,
-        Float,
-        Float32,
-        Float64,
-        Int,
-        Int32,
-        Int64,
-        String,
-        UInt64,
-    )
-    if pa.types.is_timestamp(col_type):
-        return DateTime
-    if pa.types.is_binary(col_type):
-        return Binary
-    if pa.types.is_floating(col_type):
-        if pa.types.is_float32(col_type):
-            return Float32
-        if pa.types.is_float64(col_type):
-            return Float64
-        return Float
-    if pa.types.is_integer(col_type):
-        if (
-            pa.types.is_int8(col_type)
-            or pa.types.is_int16(col_type)
-            or pa.types.is_int32(col_type)
-        ):
-            return Int32
-        if pa.types.is_int64(col_type):
-            return Int64
-        if pa.types.is_unsigned_integer(col_type):
-            return UInt64
-        return Int
-    if pa.types.is_boolean(col_type):
-        return Boolean
-    if pa.types.is_date(col_type):
-        return DateTime
-    if pa.types.is_string(col_type):
-        return String
-    if pa.types.is_list(col_type):
-        return Array(arrow_type_mapper(col_type.value_type))  # type: ignore[return-value]
-    if pa.types.is_struct(col_type) or pa.types.is_map(col_type):
-        return JSON
-    if isinstance(col_type, pa.lib.DictionaryType):
-        return arrow_type_mapper(col_type.value_type)  # type: ignore[return-value]
-    raise ValueError(f"{col_type!r} datatypes not supported")

{datachain-0.1.13.dist-info → datachain-0.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: datachain
-Version: 0.1.13
+Version: 0.2.1
 Summary: Wrangle unstructured AI data at scale
 Author-email: Dmitry Petrov <support@dvc.org>
 License: Apache-2.0
@@ -44,12 +44,19 @@ Requires-Dist: torch >=2.1.0 ; extra == 'cv'
 Requires-Dist: torchvision ; extra == 'cv'
 Requires-Dist: transformers >=4.36.0 ; extra == 'cv'
 Provides-Extra: dev
-Requires-Dist: datachain[tests] ; extra == 'dev'
-Requires-Dist: mypy ==1.10.0 ; extra == 'dev'
+Requires-Dist: datachain[docs,tests] ; extra == 'dev'
+Requires-Dist: mypy ==1.10.1 ; extra == 'dev'
 Requires-Dist: types-python-dateutil ; extra == 'dev'
 Requires-Dist: types-PyYAML ; extra == 'dev'
 Requires-Dist: types-requests ; extra == 'dev'
 Requires-Dist: types-ujson ; extra == 'dev'
+Provides-Extra: docs
+Requires-Dist: mkdocs >=1.5.2 ; extra == 'docs'
+Requires-Dist: mkdocs-gen-files >=0.5.0 ; extra == 'docs'
+Requires-Dist: mkdocs-material >=9.3.1 ; extra == 'docs'
+Requires-Dist: mkdocs-section-index >=0.3.6 ; extra == 'docs'
+Requires-Dist: mkdocstrings-python >=1.6.3 ; extra == 'docs'
+Requires-Dist: mkdocs-literate-nav >=0.6.1 ; extra == 'docs'
 Provides-Extra: remote
 Requires-Dist: datachain[pandas] ; extra == 'remote'
 Requires-Dist: lz4 ; extra == 'remote'
@@ -72,7 +79,7 @@ Requires-Dist: open-clip-torch ; extra == 'tests'
 Requires-Dist: aiotools >=1.7.0 ; extra == 'tests'
 Requires-Dist: requests-mock ; extra == 'tests'
 Provides-Extra: vector
-Requires-Dist: scipy ; extra == 'vector'
+Requires-Dist: usearch ; extra == 'vector'
 |PyPI| |Python Version| |Codecov| |Tests| |License|

datachain 0.1.13__py3-none-any.whl → 0.2.1__py3-none-any.whl

Potentially problematic release.

datachain 0.1.13py3-none-any.whl → 0.2.1py3-none-any.whl