PyPI - datachain - Versions diffs - 0.14.2__py3-none-any.whl → 0.39.0__py3-none-any.whl - Mend

datachain 0.14.2py3-none-any.whl → 0.39.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (137) hide show

datachain/__init__.py +20 -0
datachain/asyn.py +11 -12
datachain/cache.py +7 -7
datachain/catalog/__init__.py +2 -2
datachain/catalog/catalog.py +621 -507
datachain/catalog/dependency.py +164 -0
datachain/catalog/loader.py +28 -18
datachain/checkpoint.py +43 -0
datachain/cli/__init__.py +24 -33
datachain/cli/commands/__init__.py +1 -8
datachain/cli/commands/datasets.py +83 -52
datachain/cli/commands/ls.py +17 -17
datachain/cli/commands/show.py +4 -4
datachain/cli/parser/__init__.py +8 -74
datachain/cli/parser/job.py +95 -3
datachain/cli/parser/studio.py +11 -4
datachain/cli/parser/utils.py +1 -2
datachain/cli/utils.py +2 -15
datachain/client/azure.py +4 -4
datachain/client/fsspec.py +45 -28
datachain/client/gcs.py +6 -6
datachain/client/hf.py +29 -2
datachain/client/http.py +157 -0
datachain/client/local.py +15 -11
datachain/client/s3.py +17 -9
datachain/config.py +4 -8
datachain/data_storage/db_engine.py +12 -6
datachain/data_storage/job.py +5 -1
datachain/data_storage/metastore.py +1252 -186
datachain/data_storage/schema.py +58 -45
datachain/data_storage/serializer.py +105 -15
datachain/data_storage/sqlite.py +286 -127
datachain/data_storage/warehouse.py +250 -113
datachain/dataset.py +353 -148
datachain/delta.py +391 -0
datachain/diff/__init__.py +27 -29
datachain/error.py +60 -0
datachain/func/__init__.py +2 -1
datachain/func/aggregate.py +66 -42
datachain/func/array.py +242 -38
datachain/func/base.py +7 -4
datachain/func/conditional.py +110 -60
datachain/func/func.py +96 -45
datachain/func/numeric.py +55 -38
datachain/func/path.py +32 -20
datachain/func/random.py +2 -2
datachain/func/string.py +67 -37
datachain/func/window.py +7 -8
datachain/hash_utils.py +123 -0
datachain/job.py +11 -7
datachain/json.py +138 -0
datachain/lib/arrow.py +58 -22
datachain/lib/audio.py +245 -0
datachain/lib/clip.py +14 -13
datachain/lib/convert/flatten.py +5 -3
datachain/lib/convert/python_to_sql.py +6 -10
datachain/lib/convert/sql_to_python.py +8 -0
datachain/lib/convert/values_to_tuples.py +156 -51
datachain/lib/data_model.py +42 -20
datachain/lib/dataset_info.py +36 -8
datachain/lib/dc/__init__.py +8 -2
datachain/lib/dc/csv.py +25 -28
datachain/lib/dc/database.py +398 -0
datachain/lib/dc/datachain.py +1289 -425
datachain/lib/dc/datasets.py +320 -38
datachain/lib/dc/hf.py +38 -24
datachain/lib/dc/json.py +29 -32
datachain/lib/dc/listings.py +112 -8
datachain/lib/dc/pandas.py +16 -12
datachain/lib/dc/parquet.py +35 -23
datachain/lib/dc/records.py +31 -23
datachain/lib/dc/storage.py +154 -64
datachain/lib/dc/storage_pattern.py +251 -0
datachain/lib/dc/utils.py +24 -16
datachain/lib/dc/values.py +8 -9
datachain/lib/file.py +622 -89
datachain/lib/hf.py +69 -39
datachain/lib/image.py +14 -14
datachain/lib/listing.py +14 -11
datachain/lib/listing_info.py +1 -2
datachain/lib/meta_formats.py +3 -4
datachain/lib/model_store.py +39 -7
datachain/lib/namespaces.py +125 -0
datachain/lib/projects.py +130 -0
datachain/lib/pytorch.py +32 -21
datachain/lib/settings.py +192 -56
datachain/lib/signal_schema.py +427 -104
datachain/lib/tar.py +1 -2
datachain/lib/text.py +8 -7
datachain/lib/udf.py +164 -76
datachain/lib/udf_signature.py +60 -35
datachain/lib/utils.py +118 -4
datachain/lib/video.py +17 -9
datachain/lib/webdataset.py +61 -56
datachain/lib/webdataset_laion.py +15 -16
datachain/listing.py +22 -10
datachain/model/bbox.py +3 -1
datachain/model/ultralytics/bbox.py +16 -12
datachain/model/ultralytics/pose.py +16 -12
datachain/model/ultralytics/segment.py +16 -12
datachain/namespace.py +84 -0
datachain/node.py +6 -6
datachain/nodes_thread_pool.py +0 -1
datachain/plugins.py +24 -0
datachain/project.py +78 -0
datachain/query/batch.py +40 -41
datachain/query/dataset.py +604 -322
datachain/query/dispatch.py +261 -154
datachain/query/metrics.py +4 -6
datachain/query/params.py +2 -3
datachain/query/queue.py +3 -12
datachain/query/schema.py +11 -6
datachain/query/session.py +200 -33
datachain/query/udf.py +34 -2
datachain/remote/studio.py +171 -69
datachain/script_meta.py +12 -12
datachain/semver.py +68 -0
datachain/sql/__init__.py +2 -0
datachain/sql/functions/array.py +33 -1
datachain/sql/postgresql_dialect.py +9 -0
datachain/sql/postgresql_types.py +21 -0
datachain/sql/sqlite/__init__.py +5 -1
datachain/sql/sqlite/base.py +102 -29
datachain/sql/sqlite/types.py +8 -13
datachain/sql/types.py +70 -15
datachain/studio.py +223 -46
datachain/toolkit/split.py +31 -10
datachain/utils.py +101 -59
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/METADATA +77 -22
datachain-0.39.0.dist-info/RECORD +173 -0
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/WHEEL +1 -1
datachain/cli/commands/query.py +0 -53
datachain/query/utils.py +0 -42
datachain-0.14.2.dist-info/RECORD +0 -158
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/entry_points.txt +0 -0
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/licenses/LICENSE +0 -0
{datachain-0.14.2.dist-info → datachain-0.39.0.dist-info}/top_level.txt +0 -0

datachain/client/gcs.py CHANGED Viewed

@@ -3,7 +3,7 @@ import json
 import os
 from collections.abc import Iterable
 from datetime import datetime
-from typing import Any, Optional, cast
+from typing import Any, cast
 from dateutil.parser import isoparse
 from gcsfs import GCSFileSystem
@@ -15,7 +15,7 @@ from .fsspec import DELIMITER, Client, ResultQueue
 # Patch gcsfs for consistency with s3fs
 GCSFileSystem.set_session = GCSFileSystem._set_session
-PageQueue = asyncio.Queue[Optional[Iterable[dict[str, Any]]]]
+PageQueue = asyncio.Queue[Iterable[dict[str, Any]] | None]
 class GCSClient(Client):
@@ -74,7 +74,7 @@ class GCSClient(Client):
             try:
                 await self._get_pages(prefix, page_queue)
                 found = await consumer
-                if not found:
+                if not found and prefix:
                     raise FileNotFoundError(f"Unable to resolve remote path: {prefix}")
             finally:
                 consumer.cancel()  # In case _get_pages() raised
@@ -115,7 +115,7 @@ class GCSClient(Client):
                     maxResults=page_size,
                     pageToken=next_page_token,
                     json_out=True,
-                    versions="true",
+                    versions="true" if self._is_version_aware() else "false",
                 )
                 assert page["kind"] == "storage#objects"
                 await page_queue.put(page.get("items", []))
@@ -134,12 +134,12 @@ class GCSClient(Client):
             source=self.uri,
             path=path,
             etag=v.get("etag", ""),
-            version=v.get("generation", ""),
+            version=v.get("generation", "") if self._is_version_aware() else "",
             is_latest=not v.get("timeDeleted"),
             last_modified=self.parse_timestamp(v["updated"]),
             size=v.get("size", ""),
         )
     @classmethod
-    def version_path(cls, path: str, version_id: Optional[str]) -> str:
+    def version_path(cls, path: str, version_id: str | None) -> str:
         return f"{path}#{version_id}" if version_id else path

datachain/client/hf.py CHANGED Viewed

@@ -15,6 +15,34 @@ class classproperty:  # noqa: N801
         return self.fget(owner)
+def _wrap_class(sync_fs_class):
+    """
+    Analog of `AsyncFileSystemWrapper.wrap_class` from fsspec, but sets
+    asynchronous to False by default. This is similar to other Async FS
+    we initialize. E.g. it means we don't break things in Jupyter where code
+    run in async.
+    This also fixes write operations by ensuring they are properly forwarded
+    to the underlying filesystem without async buffering issues.
+    """
+    from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
+    class GeneratedAsyncFileSystemWrapper(AsyncFileSystemWrapper):
+        def __init__(self, *args, **kwargs):
+            sync_fs = sync_fs_class(*args, **kwargs)
+            super().__init__(sync_fs, asynchronous=False)
+        def open(self, path, mode="rb", **kwargs):
+            # Override open to ensure write operations work correctly.
+            # It seems to be a bug in the fsspec wrapper. It avoids
+            # wrapping open() explicitly but also doesn't redirect it to
+            # sync filesystem.
+            return self.sync_fs.open(path, mode, **kwargs)
+    GeneratedAsyncFileSystemWrapper.__name__ = f"Async{sync_fs_class.__name__}Wrapper"
+    return GeneratedAsyncFileSystemWrapper
 @functools.cache
 def get_hf_filesystem_cls():
     import fsspec
@@ -29,10 +57,9 @@ def get_hf_filesystem_cls():
             f"{fsspec_version} is installed."
         )
-    from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
     from huggingface_hub import HfFileSystem
-    fs_cls = AsyncFileSystemWrapper.wrap_class(HfFileSystem)
+    fs_cls = _wrap_class(HfFileSystem)
     # AsyncFileSystemWrapper does not set class properties, so we need to set them back.
     fs_cls.protocol = HfFileSystem.protocol
     return fs_cls

datachain/client/http.py ADDED Viewed

@@ -0,0 +1,157 @@
+from datetime import datetime, timezone
+from typing import TYPE_CHECKING, Any, ClassVar, cast
+from urllib.parse import urlparse
+from fsspec.implementations.http import HTTPFileSystem
+from datachain.dataset import StorageURI
+from datachain.lib.file import File
+from .fsspec import Client
+if TYPE_CHECKING:
+    from datachain.cache import Cache
+class HTTPClient(Client):
+    FS_CLASS = HTTPFileSystem
+    PREFIX: ClassVar[str] = "http://"
+    protocol: ClassVar[str] = "http"
+    @classmethod
+    def create_fs(cls, **kwargs) -> HTTPFileSystem:
+        # Configure HTTPFileSystem options
+        kwargs.setdefault("simple_links", True)
+        kwargs.setdefault("same_scheme", True)
+        kwargs.setdefault("cache_type", "bytes")
+        kwargs.pop("version_aware", None)
+        fs = cls.FS_CLASS(**kwargs)
+        fs.invalidate_cache()
+        return cast("HTTPFileSystem", fs)
+    @classmethod
+    def from_name(
+        cls,
+        name: str,
+        cache: "Cache",
+        kwargs: dict[str, Any],
+    ) -> "HTTPClient":
+        parsed = urlparse(name)
+        if parsed.scheme:
+            name = parsed.netloc + parsed.path
+        return cls(name, kwargs, cache)
+    @classmethod
+    def split_url(cls, url: str) -> tuple[str, str]:
+        """Split HTTP/HTTPS URL into domain (bucket equivalent) and path."""
+        parsed = urlparse(url)
+        domain = parsed.netloc
+        path = parsed.path.lstrip("/")
+        if parsed.query:
+            path += f"?{parsed.query}"
+        if parsed.fragment:
+            path += f"#{parsed.fragment}"
+        return domain, path
+    @classmethod
+    def get_uri(cls, name: str) -> "StorageURI":
+        if not name.startswith(("http://", "https://")):
+            return StorageURI(f"{cls.PREFIX}{name}")
+        return StorageURI(name)
+    @classmethod
+    def is_root_url(cls, url: str) -> bool:
+        parsed = urlparse(url)
+        return parsed.path in ("", "/") and not parsed.query and not parsed.fragment
+    def get_full_path(self, rel_path: str, version_id: str | None = None) -> str:
+        if self.name.startswith(("http://", "https://")):
+            base_url = self.name
+        else:
+            if rel_path and "/" in rel_path:
+                first_part = rel_path.split("/")[0]
+                if "." in first_part and not first_part.startswith("."):
+                    return f"{self.protocol}://{rel_path}"
+            base_url = f"{self.protocol}://{self.name}"
+        if rel_path:
+            if not base_url.endswith("/") and not rel_path.startswith("/"):
+                base_url += "/"
+            full_url = base_url + rel_path
+        else:
+            full_url = base_url
+        return full_url
+    def url(self, path: str, expires: int = 3600, **kwargs) -> str:
+        """
+        Generate URL for the given path.
+        Note: HTTP URLs don't support signed/expiring URLs.
+        """
+        return self.get_full_path(path, kwargs.pop("version_id", None))
+    def info_to_file(self, v: dict[str, Any], path: str) -> File:
+        etag = v.get("ETag", "").strip('"')
+        last_modified = v.get("last_modified")
+        if last_modified:
+            if isinstance(last_modified, str):
+                try:
+                    from email.utils import parsedate_to_datetime
+                    last_modified = parsedate_to_datetime(last_modified)
+                except (ValueError, TypeError):
+                    last_modified = datetime.now(timezone.utc)
+            elif isinstance(last_modified, (int, float)):
+                last_modified = datetime.fromtimestamp(last_modified, timezone.utc)
+        else:
+            last_modified = datetime.now(timezone.utc)
+        return File(
+            source=self.uri,
+            path=path,
+            size=v.get("size", 0),
+            etag=etag,
+            version="",
+            is_latest=True,
+            last_modified=last_modified,
+        )
+    def upload(self, data: bytes, path: str) -> "File":
+        raise NotImplementedError(
+            "HTTP/HTTPS client is read-only. Upload operations are not supported."
+        )
+    def get_file_info(self, path: str, version_id: str | None = None) -> "File":
+        info = self.fs.info(self.get_full_path(path))
+        return self.info_to_file(info, path)
+    def open_object(self, file: "File", use_cache: bool = True, cb=None):
+        from datachain.client.fileslice import FileWrapper
+        if use_cache and (cache_path := self.cache.get_path(file)):
+            return open(cache_path, mode="rb")
+        assert not file.location
+        return FileWrapper(
+            self.fs.open(self.get_full_path(file.get_path_normalized())),
+            cb or (lambda x: None),
+        )
+    async def get_file(self, lpath, rpath, callback, version_id: str | None = None):
+        return await self.fs._get_file(lpath, rpath, callback=callback)
+    async def _fetch_dir(self, prefix: str, pbar, result_queue) -> set[str]:
+        full_url = self.get_full_path(prefix)
+        raise NotImplementedError(f"Cannot download file from {full_url}")
+class HTTPSClient(HTTPClient):
+    protocol = "https"
+    PREFIX = "https://"

datachain/client/local.py CHANGED Viewed

@@ -2,14 +2,14 @@ import os
 import posixpath
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Any
 from urllib.parse import urlparse
 from fsspec.implementations.local import LocalFileSystem
 from datachain.lib.file import File
-from .fsspec import Client
+from .fsspec import Client, is_win_local_path
 if TYPE_CHECKING:
     from datachain.cache import Cache
@@ -57,9 +57,13 @@ class FileClient(Client):
             /home/user/animals/ -> file:///home/user/animals/
             C:\\windows\animals -> file:///C:/windows/animals
         """
+        parsed = urlparse(path)
+        if parsed.scheme and not is_win_local_path(path):
+            return path
         uri = Path(path).expanduser().absolute().resolve().as_uri()
-        if path[-1] == os.sep:
-            # we should keep os separator from the end of the path
+        if path and path[-1] in (os.sep, "/"):
+            # keep trailing separator so directory URIs stay rooted
             uri += "/"  # in uri (file:///...) all separators are / regardless of os
         return uri
@@ -99,13 +103,13 @@ class FileClient(Client):
         )
     async def get_current_etag(self, file: "File") -> str:
-        info = self.fs.info(self.get_full_path(file.path))
+        info = self.fs.info(self.get_full_path(file.get_path_normalized()))
         return self.info_to_file(info, "").etag
-    async def get_size(self, path: str, version_id: Optional[str] = None) -> int:
+    async def get_size(self, path: str, version_id: str | None = None) -> int:
         return self.fs.size(path)
-    async def get_file(self, lpath, rpath, callback, version_id: Optional[str] = None):
+    async def get_file(self, lpath, rpath, callback, version_id: str | None = None):
         return self.fs.get_file(lpath, rpath, callback=callback)
     async def ls_dir(self, path):
@@ -114,7 +118,7 @@ class FileClient(Client):
     def rel_path(self, path):
         return posixpath.relpath(path, self.name)
-    def get_full_path(self, rel_path, version_id: Optional[str] = None):
+    def get_full_path(self, rel_path, version_id: str | None = None):
         full_path = Path(self.name, rel_path).as_posix()
         if rel_path.endswith("/") or not rel_path:
             full_path += "/"
@@ -138,8 +142,8 @@ class FileClient(Client):
         if not self.use_symlinks:
             super().fetch_nodes(nodes, shared_progress_bar)
-    def do_instantiate_object(self, uid, dst):
+    def do_instantiate_object(self, file: File, dst: str) -> None:
         if self.use_symlinks:
-            os.symlink(Path(self.name, uid.path), dst)
+            os.symlink(Path(self.name, file.path), dst)
         else:
-            super().do_instantiate_object(uid, dst)
+            super().do_instantiate_object(file, dst)

datachain/client/s3.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import asyncio
 import os
-from typing import Any, Optional, cast
+from typing import Any, cast
 from urllib.parse import parse_qs, urlsplit, urlunsplit
 from botocore.exceptions import NoCredentialsError
@@ -80,7 +80,7 @@ class ClientS3(Client):
             finally:
                 await page_queue.put(None)
-        async def process_pages(page_queue, result_queue):
+        async def process_pages(page_queue, result_queue, prefix):
             found = False
             with tqdm(desc=f"Listing {self.uri}", unit=" objects", leave=False) as pbar:
                 while (res := await page_queue.get()) is not None:
@@ -94,14 +94,14 @@ class ClientS3(Client):
                     if entries:
                         await result_queue.put(entries)
                         pbar.update(len(entries))
-            if not found:
+            if not found and prefix:
                 raise FileNotFoundError(f"Unable to resolve remote path: {prefix}")
         try:
             prefix = start_prefix
             if prefix:
                 prefix = prefix.lstrip(DELIMITER) + DELIMITER
-            versions = True
+            versions = self._is_version_aware()
             fs = self.fs
             await fs.set_session()
             s3 = await fs.get_s3(self.name)
@@ -118,7 +118,9 @@ class ClientS3(Client):
                 Delimiter="",
             )
             page_queue: asyncio.Queue[list] = asyncio.Queue(2)
-            consumer = asyncio.create_task(process_pages(page_queue, result_queue))
+            consumer = asyncio.create_task(
+                process_pages(page_queue, result_queue, prefix)
+            )
             try:
                 await get_pages(it, page_queue)
                 await consumer
@@ -137,14 +139,16 @@ class ClientS3(Client):
             source=self.uri,
             path=v["Key"],
             etag=v.get("ETag", "").strip('"'),
-            version=ClientS3.clean_s3_version(v.get("VersionId", "")),
+            version=(
+                ClientS3.clean_s3_version(v.get("VersionId", "")) if versions else ""
+            ),
             is_latest=v.get("IsLatest", True),
             last_modified=v.get("LastModified", ""),
             size=v["Size"],
         )
     @classmethod
-    def version_path(cls, path: str, version_id: Optional[str]) -> str:
+    def version_path(cls, path: str, version_id: str | None) -> str:
         parts = list(urlsplit(path))
         query = parse_qs(parts[3])
         if "versionId" in query:
@@ -183,7 +187,7 @@ class ClientS3(Client):
         return subdirs
     @staticmethod
-    def clean_s3_version(ver: Optional[str]) -> str:
+    def clean_s3_version(ver: str | None) -> str:
         return ver if (ver is not None and ver != "null") else ""
     def info_to_file(self, v: dict[str, Any], path: str) -> File:
@@ -191,7 +195,11 @@ class ClientS3(Client):
             source=self.uri,
             path=path,
             size=v["size"],
-            version=ClientS3.clean_s3_version(v.get("VersionId", "")),
+            version=(
+                ClientS3.clean_s3_version(v.get("VersionId", ""))
+                if self._is_version_aware()
+                else ""
+            ),
             etag=v.get("ETag", "").strip('"'),
             is_latest=v.get("IsLatest", True),
             last_modified=v.get("LastModified", ""),

datachain/config.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from collections.abc import Mapping
 from contextlib import contextmanager
 from enum import Enum
-from typing import Optional, Union
 from tomlkit import TOMLDocument, dump, load
@@ -22,16 +21,13 @@ class Config:
     # In the order of precedence
     LEVELS = SYSTEM_LEVELS + LOCAL_LEVELS
-    def __init__(
-        self,
-        level: Optional[ConfigLevel] = None,
-    ):
+    def __init__(self, level: ConfigLevel | None = None):
         self.level = level
         self.init()
     @classmethod
-    def get_dir(cls, level: Optional[ConfigLevel]) -> str:
+    def get_dir(cls, level: ConfigLevel | None) -> str:
         if level == ConfigLevel.SYSTEM:
             return system_config_dir()
         if level == ConfigLevel.GLOBAL:
@@ -43,7 +39,7 @@ class Config:
         d = DataChainDir(self.get_dir(self.level))
         d.init()
-    def load_one(self, level: Optional[ConfigLevel] = None) -> TOMLDocument:
+    def load_one(self, level: ConfigLevel | None = None) -> TOMLDocument:
         config_path = DataChainDir(self.get_dir(level)).config
         try:
@@ -128,7 +124,7 @@ class Config:
         return remote_conf
-def merge(into: Union[TOMLDocument, dict], update: Union[TOMLDocument, dict]):
+def merge(into: TOMLDocument | dict, update: TOMLDocument | dict):
     """Merges second dict into first recursively"""
     for key, val in update.items():
         if isinstance(into.get(key), dict) and isinstance(val, dict):

datachain/data_storage/db_engine.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 from abc import ABC, abstractmethod
 from collections.abc import Iterator
-from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union
+from typing import TYPE_CHECKING, Any, ClassVar
 import sqlalchemy as sa
 from sqlalchemy.sql import FROM_LINTING
@@ -58,7 +58,7 @@ class DatabaseEngine(ABC, Serializable):
     @classmethod
     def compile_to_args(
         cls, statement: "ClauseElement", **kwargs
-    ) -> Union[tuple[str], tuple[str, dict[str, Any]]]:
+    ) -> tuple[str] | tuple[str, dict[str, Any]]:
         """
         Compile a sqlalchemy query or ddl object to an args tuple.
@@ -75,8 +75,8 @@ class DatabaseEngine(ABC, Serializable):
     def execute(
         self,
         query,
-        cursor: Optional[Any] = None,
-        conn: Optional[Any] = None,
+        cursor: Any | None = None,
+        conn: Any | None = None,
     ) -> Iterator[tuple[Any, ...]]: ...
     def get_table(self, name: str) -> "Table":
@@ -90,7 +90,7 @@ class DatabaseEngine(ABC, Serializable):
     @abstractmethod
     def executemany(
-        self, query, params, cursor: Optional[Any] = None
+        self, query, params, cursor: Any | None = None
     ) -> Iterator[tuple[Any, ...]]: ...
     @abstractmethod
@@ -112,7 +112,13 @@ class DatabaseEngine(ABC, Serializable):
         return sa.inspect(self.engine).has_table(name)
     @abstractmethod
-    def create_table(self, table: "Table", if_not_exists: bool = True) -> None: ...
+    def create_table(
+        self,
+        table: "Table",
+        if_not_exists: bool = True,
+        *,
+        kind: str | None = None,
+    ) -> None: ...
     @abstractmethod
     def drop_table(self, table: "Table", if_exists: bool = False) -> None: ...

datachain/data_storage/job.py CHANGED Viewed

@@ -3,6 +3,8 @@ from enum import Enum
 class JobStatus(int, Enum):
     CREATED = 1
+    SCHEDULED = 10
+    PROVISIONING = 12
     QUEUED = 2
     INIT = 3
     RUNNING = 4
@@ -11,10 +13,12 @@ class JobStatus(int, Enum):
     CANCELING = 7
     CANCELED = 8
     CANCELING_SCHEDULED = 9
+    TASK = 11
+    PENDING = 13
     @classmethod
     def finished(cls) -> tuple[int, ...]:
-        return cls.COMPLETE, cls.FAILED, cls.CANCELED
+        return cls.COMPLETE, cls.FAILED, cls.CANCELED, cls.TASK
 class JobQueryType(int, Enum):

datachain 0.14.2__py3-none-any.whl → 0.39.0__py3-none-any.whl

datachain 0.14.2py3-none-any.whl → 0.39.0py3-none-any.whl