datachain 0.34.6__py3-none-any.whl → 0.34.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/asyn.py +11 -12
- datachain/cache.py +5 -5
- datachain/catalog/catalog.py +75 -83
- datachain/catalog/loader.py +3 -3
- datachain/checkpoint.py +1 -2
- datachain/cli/__init__.py +2 -4
- datachain/cli/commands/datasets.py +13 -13
- datachain/cli/commands/ls.py +4 -4
- datachain/cli/commands/query.py +3 -3
- datachain/cli/commands/show.py +2 -2
- datachain/cli/parser/job.py +1 -1
- datachain/cli/parser/utils.py +1 -2
- datachain/cli/utils.py +1 -2
- datachain/client/azure.py +2 -2
- datachain/client/fsspec.py +11 -21
- datachain/client/gcs.py +3 -3
- datachain/client/http.py +4 -4
- datachain/client/local.py +4 -4
- datachain/client/s3.py +3 -3
- datachain/config.py +4 -8
- datachain/data_storage/db_engine.py +5 -5
- datachain/data_storage/metastore.py +107 -107
- datachain/data_storage/schema.py +18 -24
- datachain/data_storage/sqlite.py +21 -28
- datachain/data_storage/warehouse.py +13 -13
- datachain/dataset.py +64 -70
- datachain/delta.py +21 -18
- datachain/diff/__init__.py +13 -13
- datachain/func/aggregate.py +9 -11
- datachain/func/array.py +12 -12
- datachain/func/base.py +7 -4
- datachain/func/conditional.py +9 -13
- datachain/func/func.py +45 -42
- datachain/func/numeric.py +5 -7
- datachain/func/string.py +2 -2
- datachain/hash_utils.py +54 -81
- datachain/job.py +8 -8
- datachain/lib/arrow.py +17 -14
- datachain/lib/audio.py +6 -6
- datachain/lib/clip.py +5 -4
- datachain/lib/convert/python_to_sql.py +4 -22
- datachain/lib/convert/values_to_tuples.py +4 -9
- datachain/lib/data_model.py +20 -19
- datachain/lib/dataset_info.py +6 -6
- datachain/lib/dc/csv.py +10 -10
- datachain/lib/dc/database.py +28 -29
- datachain/lib/dc/datachain.py +98 -97
- datachain/lib/dc/datasets.py +22 -22
- datachain/lib/dc/hf.py +4 -4
- datachain/lib/dc/json.py +9 -10
- datachain/lib/dc/listings.py +5 -8
- datachain/lib/dc/pandas.py +3 -6
- datachain/lib/dc/parquet.py +5 -5
- datachain/lib/dc/records.py +5 -5
- datachain/lib/dc/storage.py +12 -12
- datachain/lib/dc/storage_pattern.py +2 -2
- datachain/lib/dc/utils.py +11 -14
- datachain/lib/dc/values.py +3 -6
- datachain/lib/file.py +26 -26
- datachain/lib/hf.py +7 -5
- datachain/lib/image.py +13 -13
- datachain/lib/listing.py +5 -5
- datachain/lib/listing_info.py +1 -2
- datachain/lib/meta_formats.py +1 -2
- datachain/lib/model_store.py +3 -3
- datachain/lib/namespaces.py +4 -6
- datachain/lib/projects.py +5 -9
- datachain/lib/pytorch.py +10 -10
- datachain/lib/settings.py +23 -23
- datachain/lib/signal_schema.py +52 -44
- datachain/lib/text.py +8 -7
- datachain/lib/udf.py +25 -17
- datachain/lib/udf_signature.py +11 -11
- datachain/lib/video.py +3 -4
- datachain/lib/webdataset.py +30 -35
- datachain/lib/webdataset_laion.py +15 -16
- datachain/listing.py +4 -4
- datachain/model/bbox.py +3 -1
- datachain/namespace.py +4 -4
- datachain/node.py +6 -6
- datachain/nodes_thread_pool.py +0 -1
- datachain/plugins.py +1 -7
- datachain/project.py +4 -4
- datachain/query/batch.py +7 -8
- datachain/query/dataset.py +80 -87
- datachain/query/dispatch.py +7 -7
- datachain/query/metrics.py +3 -4
- datachain/query/params.py +2 -3
- datachain/query/schema.py +7 -6
- datachain/query/session.py +7 -7
- datachain/query/udf.py +8 -7
- datachain/query/utils.py +3 -5
- datachain/remote/studio.py +33 -39
- datachain/script_meta.py +12 -12
- datachain/sql/sqlite/base.py +6 -9
- datachain/studio.py +30 -30
- datachain/toolkit/split.py +1 -2
- datachain/utils.py +21 -21
- {datachain-0.34.6.dist-info → datachain-0.34.7.dist-info}/METADATA +2 -3
- datachain-0.34.7.dist-info/RECORD +173 -0
- datachain-0.34.6.dist-info/RECORD +0 -173
- {datachain-0.34.6.dist-info → datachain-0.34.7.dist-info}/WHEEL +0 -0
- {datachain-0.34.6.dist-info → datachain-0.34.7.dist-info}/entry_points.txt +0 -0
- {datachain-0.34.6.dist-info → datachain-0.34.7.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.34.6.dist-info → datachain-0.34.7.dist-info}/top_level.txt +0 -0
datachain/cli/commands/ls.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import shlex
|
|
2
2
|
from collections.abc import Iterable, Iterator
|
|
3
3
|
from itertools import chain
|
|
4
|
-
from typing import TYPE_CHECKING
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
5
|
|
|
6
6
|
if TYPE_CHECKING:
|
|
7
7
|
from datachain.catalog import Catalog
|
|
@@ -16,7 +16,7 @@ def ls(
|
|
|
16
16
|
studio: bool = False,
|
|
17
17
|
local: bool = False,
|
|
18
18
|
all: bool = True,
|
|
19
|
-
team:
|
|
19
|
+
team: str | None = None,
|
|
20
20
|
**kwargs,
|
|
21
21
|
):
|
|
22
22
|
token = Config().read().get("studio", {}).get("token")
|
|
@@ -32,7 +32,7 @@ def ls(
|
|
|
32
32
|
def ls_local(
|
|
33
33
|
sources,
|
|
34
34
|
long: bool = False,
|
|
35
|
-
catalog:
|
|
35
|
+
catalog: "Catalog | None" = None,
|
|
36
36
|
client_config=None,
|
|
37
37
|
**kwargs,
|
|
38
38
|
):
|
|
@@ -78,7 +78,7 @@ def format_ls_entry(entry: str) -> str:
|
|
|
78
78
|
def ls_remote(
|
|
79
79
|
paths: Iterable[str],
|
|
80
80
|
long: bool = False,
|
|
81
|
-
team:
|
|
81
|
+
team: str | None = None,
|
|
82
82
|
):
|
|
83
83
|
from datachain.node import long_line_str
|
|
84
84
|
from datachain.remote.studio import StudioClient
|
datachain/cli/commands/query.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import sys
|
|
3
3
|
import traceback
|
|
4
|
-
from typing import TYPE_CHECKING
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
5
|
|
|
6
6
|
if TYPE_CHECKING:
|
|
7
7
|
from datachain.catalog import Catalog
|
|
@@ -10,8 +10,8 @@ if TYPE_CHECKING:
|
|
|
10
10
|
def query(
|
|
11
11
|
catalog: "Catalog",
|
|
12
12
|
script: str,
|
|
13
|
-
parallel:
|
|
14
|
-
params:
|
|
13
|
+
parallel: int | None = None,
|
|
14
|
+
params: dict[str, str] | None = None,
|
|
15
15
|
) -> None:
|
|
16
16
|
from datachain.data_storage import JobQueryType, JobStatus
|
|
17
17
|
|
datachain/cli/commands/show.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from collections.abc import Sequence
|
|
2
|
-
from typing import TYPE_CHECKING
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
3
|
|
|
4
4
|
from datachain.lib.signal_schema import SignalSchema
|
|
5
5
|
|
|
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
|
|
|
10
10
|
def show(
|
|
11
11
|
catalog: "Catalog",
|
|
12
12
|
name: str,
|
|
13
|
-
version:
|
|
13
|
+
version: str | None = None,
|
|
14
14
|
limit: int = 10,
|
|
15
15
|
offset: int = 0,
|
|
16
16
|
columns: Sequence[str] = (),
|
datachain/cli/parser/job.py
CHANGED
|
@@ -83,7 +83,7 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
83
83
|
studio_run_parser.add_argument(
|
|
84
84
|
"--python-version",
|
|
85
85
|
action="store",
|
|
86
|
-
help="Python version for the job (e.g., 3.
|
|
86
|
+
help="Python version for the job (e.g., 3.10, 3.11, 3.12, 3.13)",
|
|
87
87
|
)
|
|
88
88
|
studio_run_parser.add_argument(
|
|
89
89
|
"--repository",
|
datachain/cli/parser/utils.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from argparse import Action, ArgumentParser, ArgumentTypeError, HelpFormatter
|
|
2
|
-
from typing import Union
|
|
3
2
|
|
|
4
3
|
from datachain.cli.utils import CommaSeparatedArgs
|
|
5
4
|
|
|
@@ -44,7 +43,7 @@ def parse_find_column(column: str) -> str:
|
|
|
44
43
|
)
|
|
45
44
|
|
|
46
45
|
|
|
47
|
-
def add_sources_arg(parser: ArgumentParser, nargs:
|
|
46
|
+
def add_sources_arg(parser: ArgumentParser, nargs: str | int = "+") -> Action:
|
|
48
47
|
return parser.add_argument(
|
|
49
48
|
"sources",
|
|
50
49
|
type=str,
|
datachain/cli/utils.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from argparse import SUPPRESS, Action, ArgumentError, Namespace, _AppendAction
|
|
3
|
-
from typing import Optional
|
|
4
3
|
|
|
5
4
|
from datachain.error import DataChainError
|
|
6
5
|
|
|
@@ -84,7 +83,7 @@ def get_logging_level(args: Namespace) -> int:
|
|
|
84
83
|
return logging.INFO
|
|
85
84
|
|
|
86
85
|
|
|
87
|
-
def determine_flavors(studio: bool, local: bool, all: bool, token:
|
|
86
|
+
def determine_flavors(studio: bool, local: bool, all: bool, token: str | None):
|
|
88
87
|
if studio and not token:
|
|
89
88
|
raise DataChainError(
|
|
90
89
|
"Not logged in to Studio. Log in with 'datachain auth login'."
|
datachain/client/azure.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
from urllib.parse import parse_qs, urlsplit, urlunsplit
|
|
3
3
|
|
|
4
4
|
from adlfs import AzureBlobFileSystem
|
|
@@ -73,7 +73,7 @@ class AzureClient(Client):
|
|
|
73
73
|
result_queue.put_nowait(None)
|
|
74
74
|
|
|
75
75
|
@classmethod
|
|
76
|
-
def version_path(cls, path: str, version_id:
|
|
76
|
+
def version_path(cls, path: str, version_id: str | None) -> str:
|
|
77
77
|
parts = list(urlsplit(path))
|
|
78
78
|
query = parse_qs(parts[3])
|
|
79
79
|
if "versionid" in query:
|
datachain/client/fsspec.py
CHANGED
|
@@ -10,15 +10,7 @@ from abc import ABC, abstractmethod
|
|
|
10
10
|
from collections.abc import AsyncIterator, Iterator, Sequence
|
|
11
11
|
from datetime import datetime
|
|
12
12
|
from shutil import copy2
|
|
13
|
-
from typing import
|
|
14
|
-
TYPE_CHECKING,
|
|
15
|
-
Any,
|
|
16
|
-
BinaryIO,
|
|
17
|
-
ClassVar,
|
|
18
|
-
NamedTuple,
|
|
19
|
-
Optional,
|
|
20
|
-
Union,
|
|
21
|
-
)
|
|
13
|
+
from typing import TYPE_CHECKING, Any, BinaryIO, ClassVar, NamedTuple
|
|
22
14
|
from urllib.parse import urlparse
|
|
23
15
|
|
|
24
16
|
from dvc_objects.fs.system import reflink
|
|
@@ -46,7 +38,7 @@ DELIMITER = "/" # Path delimiter.
|
|
|
46
38
|
DATA_SOURCE_URI_PATTERN = re.compile(r"^[\w]+:\/\/.*$")
|
|
47
39
|
CLOUD_STORAGE_PROTOCOLS = {"s3", "gs", "az", "hf"}
|
|
48
40
|
|
|
49
|
-
ResultQueue = asyncio.Queue[
|
|
41
|
+
ResultQueue = asyncio.Queue[Sequence["File"] | None]
|
|
50
42
|
|
|
51
43
|
|
|
52
44
|
def _is_win_local_path(uri: str) -> bool:
|
|
@@ -76,7 +68,7 @@ def get_cloud_schemes() -> list[str]:
|
|
|
76
68
|
class Bucket(NamedTuple):
|
|
77
69
|
name: str
|
|
78
70
|
uri: "StorageURI"
|
|
79
|
-
created:
|
|
71
|
+
created: datetime | None
|
|
80
72
|
|
|
81
73
|
|
|
82
74
|
class Client(ABC):
|
|
@@ -88,12 +80,12 @@ class Client(ABC):
|
|
|
88
80
|
def __init__(self, name: str, fs_kwargs: dict[str, Any], cache: Cache) -> None:
|
|
89
81
|
self.name = name
|
|
90
82
|
self.fs_kwargs = fs_kwargs
|
|
91
|
-
self._fs:
|
|
83
|
+
self._fs: AbstractFileSystem | None = None
|
|
92
84
|
self.cache = cache
|
|
93
85
|
self.uri = self.get_uri(self.name)
|
|
94
86
|
|
|
95
87
|
@staticmethod
|
|
96
|
-
def get_implementation(url:
|
|
88
|
+
def get_implementation(url: str | os.PathLike[str]) -> type["Client"]: # noqa: PLR0911
|
|
97
89
|
from .azure import AzureClient
|
|
98
90
|
from .gcs import GCSClient
|
|
99
91
|
from .hf import HfClient
|
|
@@ -134,9 +126,7 @@ class Client(ABC):
|
|
|
134
126
|
return cls.get_uri(storage_name), rel_path
|
|
135
127
|
|
|
136
128
|
@staticmethod
|
|
137
|
-
def get_client(
|
|
138
|
-
source: Union[str, os.PathLike[str]], cache: Cache, **kwargs
|
|
139
|
-
) -> "Client":
|
|
129
|
+
def get_client(source: str | os.PathLike[str], cache: Cache, **kwargs) -> "Client":
|
|
140
130
|
cls = Client.get_implementation(source)
|
|
141
131
|
storage_url, _ = cls.split_url(os.fspath(source))
|
|
142
132
|
if os.name == "nt":
|
|
@@ -152,7 +142,7 @@ class Client(ABC):
|
|
|
152
142
|
return fs
|
|
153
143
|
|
|
154
144
|
@classmethod
|
|
155
|
-
def version_path(cls, path: str, version_id:
|
|
145
|
+
def version_path(cls, path: str, version_id: str | None) -> str:
|
|
156
146
|
return path
|
|
157
147
|
|
|
158
148
|
@classmethod
|
|
@@ -232,16 +222,16 @@ class Client(ABC):
|
|
|
232
222
|
)
|
|
233
223
|
return self.info_to_file(info, file_path).etag
|
|
234
224
|
|
|
235
|
-
def get_file_info(self, path: str, version_id:
|
|
225
|
+
def get_file_info(self, path: str, version_id: str | None = None) -> "File":
|
|
236
226
|
info = self.fs.info(self.get_full_path(path, version_id), version_id=version_id)
|
|
237
227
|
return self.info_to_file(info, path)
|
|
238
228
|
|
|
239
|
-
async def get_size(self, path: str, version_id:
|
|
229
|
+
async def get_size(self, path: str, version_id: str | None = None) -> int:
|
|
240
230
|
return await self.fs._size(
|
|
241
231
|
self.version_path(path, version_id), version_id=version_id
|
|
242
232
|
)
|
|
243
233
|
|
|
244
|
-
async def get_file(self, lpath, rpath, callback, version_id:
|
|
234
|
+
async def get_file(self, lpath, rpath, callback, version_id: str | None = None):
|
|
245
235
|
return await self.fs._get_file(
|
|
246
236
|
self.version_path(lpath, version_id),
|
|
247
237
|
rpath,
|
|
@@ -355,7 +345,7 @@ class Client(ABC):
|
|
|
355
345
|
def rel_path(self, path: str) -> str:
|
|
356
346
|
return self.fs.split_path(path)[1]
|
|
357
347
|
|
|
358
|
-
def get_full_path(self, rel_path: str, version_id:
|
|
348
|
+
def get_full_path(self, rel_path: str, version_id: str | None = None) -> str:
|
|
359
349
|
return self.version_path(f"{self.PREFIX}{self.name}/{rel_path}", version_id)
|
|
360
350
|
|
|
361
351
|
@abstractmethod
|
datachain/client/gcs.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
|
3
3
|
import os
|
|
4
4
|
from collections.abc import Iterable
|
|
5
5
|
from datetime import datetime
|
|
6
|
-
from typing import Any,
|
|
6
|
+
from typing import Any, cast
|
|
7
7
|
|
|
8
8
|
from dateutil.parser import isoparse
|
|
9
9
|
from gcsfs import GCSFileSystem
|
|
@@ -15,7 +15,7 @@ from .fsspec import DELIMITER, Client, ResultQueue
|
|
|
15
15
|
|
|
16
16
|
# Patch gcsfs for consistency with s3fs
|
|
17
17
|
GCSFileSystem.set_session = GCSFileSystem._set_session
|
|
18
|
-
PageQueue = asyncio.Queue[
|
|
18
|
+
PageQueue = asyncio.Queue[Iterable[dict[str, Any]] | None]
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class GCSClient(Client):
|
|
@@ -141,5 +141,5 @@ class GCSClient(Client):
|
|
|
141
141
|
)
|
|
142
142
|
|
|
143
143
|
@classmethod
|
|
144
|
-
def version_path(cls, path: str, version_id:
|
|
144
|
+
def version_path(cls, path: str, version_id: str | None) -> str:
|
|
145
145
|
return f"{path}#{version_id}" if version_id else path
|
datachain/client/http.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from datetime import datetime, timezone
|
|
2
|
-
from typing import TYPE_CHECKING, Any, ClassVar,
|
|
2
|
+
from typing import TYPE_CHECKING, Any, ClassVar, cast
|
|
3
3
|
from urllib.parse import urlparse
|
|
4
4
|
|
|
5
5
|
from fsspec.implementations.http import HTTPFileSystem
|
|
@@ -70,7 +70,7 @@ class HTTPClient(Client):
|
|
|
70
70
|
parsed = urlparse(url)
|
|
71
71
|
return parsed.path in ("", "/") and not parsed.query and not parsed.fragment
|
|
72
72
|
|
|
73
|
-
def get_full_path(self, rel_path: str, version_id:
|
|
73
|
+
def get_full_path(self, rel_path: str, version_id: str | None = None) -> str:
|
|
74
74
|
if self.name.startswith(("http://", "https://")):
|
|
75
75
|
base_url = self.name
|
|
76
76
|
else:
|
|
@@ -128,7 +128,7 @@ class HTTPClient(Client):
|
|
|
128
128
|
"HTTP/HTTPS client is read-only. Upload operations are not supported."
|
|
129
129
|
)
|
|
130
130
|
|
|
131
|
-
def get_file_info(self, path: str, version_id:
|
|
131
|
+
def get_file_info(self, path: str, version_id: str | None = None) -> "File":
|
|
132
132
|
info = self.fs.info(self.get_full_path(path))
|
|
133
133
|
return self.info_to_file(info, path)
|
|
134
134
|
|
|
@@ -144,7 +144,7 @@ class HTTPClient(Client):
|
|
|
144
144
|
cb or (lambda x: None),
|
|
145
145
|
)
|
|
146
146
|
|
|
147
|
-
async def get_file(self, lpath, rpath, callback, version_id:
|
|
147
|
+
async def get_file(self, lpath, rpath, callback, version_id: str | None = None):
|
|
148
148
|
return await self.fs._get_file(lpath, rpath, callback=callback)
|
|
149
149
|
|
|
150
150
|
async def _fetch_dir(self, prefix: str, pbar, result_queue) -> set[str]:
|
datachain/client/local.py
CHANGED
|
@@ -2,7 +2,7 @@ import os
|
|
|
2
2
|
import posixpath
|
|
3
3
|
from datetime import datetime, timezone
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import TYPE_CHECKING, Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
6
|
from urllib.parse import urlparse
|
|
7
7
|
|
|
8
8
|
from fsspec.implementations.local import LocalFileSystem
|
|
@@ -102,10 +102,10 @@ class FileClient(Client):
|
|
|
102
102
|
info = self.fs.info(self.get_full_path(file.get_path_normalized()))
|
|
103
103
|
return self.info_to_file(info, "").etag
|
|
104
104
|
|
|
105
|
-
async def get_size(self, path: str, version_id:
|
|
105
|
+
async def get_size(self, path: str, version_id: str | None = None) -> int:
|
|
106
106
|
return self.fs.size(path)
|
|
107
107
|
|
|
108
|
-
async def get_file(self, lpath, rpath, callback, version_id:
|
|
108
|
+
async def get_file(self, lpath, rpath, callback, version_id: str | None = None):
|
|
109
109
|
return self.fs.get_file(lpath, rpath, callback=callback)
|
|
110
110
|
|
|
111
111
|
async def ls_dir(self, path):
|
|
@@ -114,7 +114,7 @@ class FileClient(Client):
|
|
|
114
114
|
def rel_path(self, path):
|
|
115
115
|
return posixpath.relpath(path, self.name)
|
|
116
116
|
|
|
117
|
-
def get_full_path(self, rel_path, version_id:
|
|
117
|
+
def get_full_path(self, rel_path, version_id: str | None = None):
|
|
118
118
|
full_path = Path(self.name, rel_path).as_posix()
|
|
119
119
|
if rel_path.endswith("/") or not rel_path:
|
|
120
120
|
full_path += "/"
|
datachain/client/s3.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import os
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, cast
|
|
4
4
|
from urllib.parse import parse_qs, urlsplit, urlunsplit
|
|
5
5
|
|
|
6
6
|
from botocore.exceptions import NoCredentialsError
|
|
@@ -148,7 +148,7 @@ class ClientS3(Client):
|
|
|
148
148
|
)
|
|
149
149
|
|
|
150
150
|
@classmethod
|
|
151
|
-
def version_path(cls, path: str, version_id:
|
|
151
|
+
def version_path(cls, path: str, version_id: str | None) -> str:
|
|
152
152
|
parts = list(urlsplit(path))
|
|
153
153
|
query = parse_qs(parts[3])
|
|
154
154
|
if "versionId" in query:
|
|
@@ -187,7 +187,7 @@ class ClientS3(Client):
|
|
|
187
187
|
return subdirs
|
|
188
188
|
|
|
189
189
|
@staticmethod
|
|
190
|
-
def clean_s3_version(ver:
|
|
190
|
+
def clean_s3_version(ver: str | None) -> str:
|
|
191
191
|
return ver if (ver is not None and ver != "null") else ""
|
|
192
192
|
|
|
193
193
|
def info_to_file(self, v: dict[str, Any], path: str) -> File:
|
datachain/config.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from collections.abc import Mapping
|
|
2
2
|
from contextlib import contextmanager
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import Optional, Union
|
|
5
4
|
|
|
6
5
|
from tomlkit import TOMLDocument, dump, load
|
|
7
6
|
|
|
@@ -22,16 +21,13 @@ class Config:
|
|
|
22
21
|
# In the order of precedence
|
|
23
22
|
LEVELS = SYSTEM_LEVELS + LOCAL_LEVELS
|
|
24
23
|
|
|
25
|
-
def __init__(
|
|
26
|
-
self,
|
|
27
|
-
level: Optional[ConfigLevel] = None,
|
|
28
|
-
):
|
|
24
|
+
def __init__(self, level: ConfigLevel | None = None):
|
|
29
25
|
self.level = level
|
|
30
26
|
|
|
31
27
|
self.init()
|
|
32
28
|
|
|
33
29
|
@classmethod
|
|
34
|
-
def get_dir(cls, level:
|
|
30
|
+
def get_dir(cls, level: ConfigLevel | None) -> str:
|
|
35
31
|
if level == ConfigLevel.SYSTEM:
|
|
36
32
|
return system_config_dir()
|
|
37
33
|
if level == ConfigLevel.GLOBAL:
|
|
@@ -43,7 +39,7 @@ class Config:
|
|
|
43
39
|
d = DataChainDir(self.get_dir(self.level))
|
|
44
40
|
d.init()
|
|
45
41
|
|
|
46
|
-
def load_one(self, level:
|
|
42
|
+
def load_one(self, level: ConfigLevel | None = None) -> TOMLDocument:
|
|
47
43
|
config_path = DataChainDir(self.get_dir(level)).config
|
|
48
44
|
|
|
49
45
|
try:
|
|
@@ -128,7 +124,7 @@ class Config:
|
|
|
128
124
|
return remote_conf
|
|
129
125
|
|
|
130
126
|
|
|
131
|
-
def merge(into:
|
|
127
|
+
def merge(into: TOMLDocument | dict, update: TOMLDocument | dict):
|
|
132
128
|
"""Merges second dict into first recursively"""
|
|
133
129
|
for key, val in update.items():
|
|
134
130
|
if isinstance(into.get(key), dict) and isinstance(val, dict):
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
3
|
from collections.abc import Iterator
|
|
4
|
-
from typing import TYPE_CHECKING, Any, ClassVar
|
|
4
|
+
from typing import TYPE_CHECKING, Any, ClassVar
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sa
|
|
7
7
|
from sqlalchemy.sql import FROM_LINTING
|
|
@@ -58,7 +58,7 @@ class DatabaseEngine(ABC, Serializable):
|
|
|
58
58
|
@classmethod
|
|
59
59
|
def compile_to_args(
|
|
60
60
|
cls, statement: "ClauseElement", **kwargs
|
|
61
|
-
) ->
|
|
61
|
+
) -> tuple[str] | tuple[str, dict[str, Any]]:
|
|
62
62
|
"""
|
|
63
63
|
Compile a sqlalchemy query or ddl object to an args tuple.
|
|
64
64
|
|
|
@@ -75,8 +75,8 @@ class DatabaseEngine(ABC, Serializable):
|
|
|
75
75
|
def execute(
|
|
76
76
|
self,
|
|
77
77
|
query,
|
|
78
|
-
cursor:
|
|
79
|
-
conn:
|
|
78
|
+
cursor: Any | None = None,
|
|
79
|
+
conn: Any | None = None,
|
|
80
80
|
) -> Iterator[tuple[Any, ...]]: ...
|
|
81
81
|
|
|
82
82
|
def get_table(self, name: str) -> "Table":
|
|
@@ -90,7 +90,7 @@ class DatabaseEngine(ABC, Serializable):
|
|
|
90
90
|
|
|
91
91
|
@abstractmethod
|
|
92
92
|
def executemany(
|
|
93
|
-
self, query, params, cursor:
|
|
93
|
+
self, query, params, cursor: Any | None = None
|
|
94
94
|
) -> Iterator[tuple[Any, ...]]: ...
|
|
95
95
|
|
|
96
96
|
@abstractmethod
|