datachain 0.19.1__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/__init__.py +3 -0
- datachain/catalog/catalog.py +180 -65
- datachain/cli/__init__.py +0 -7
- datachain/cli/commands/datasets.py +43 -28
- datachain/cli/parser/__init__.py +1 -35
- datachain/cli/parser/job.py +25 -0
- datachain/cli/parser/studio.py +11 -4
- datachain/data_storage/metastore.py +390 -37
- datachain/data_storage/schema.py +23 -1
- datachain/data_storage/sqlite.py +139 -7
- datachain/data_storage/warehouse.py +26 -7
- datachain/dataset.py +125 -12
- datachain/delta.py +9 -5
- datachain/error.py +36 -0
- datachain/lib/dataset_info.py +4 -0
- datachain/lib/dc/datachain.py +86 -7
- datachain/lib/dc/datasets.py +62 -12
- datachain/lib/dc/listings.py +111 -0
- datachain/lib/dc/records.py +1 -0
- datachain/lib/dc/storage.py +14 -2
- datachain/lib/listing.py +3 -1
- datachain/lib/namespaces.py +73 -0
- datachain/lib/projects.py +86 -0
- datachain/lib/settings.py +10 -0
- datachain/listing.py +3 -1
- datachain/namespace.py +65 -0
- datachain/project.py +78 -0
- datachain/query/dataset.py +71 -46
- datachain/query/session.py +1 -1
- datachain/remote/studio.py +67 -26
- datachain/studio.py +68 -8
- {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/METADATA +2 -2
- {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/RECORD +37 -33
- {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/WHEEL +0 -0
- {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/entry_points.txt +0 -0
- {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/top_level.txt +0 -0
datachain/remote/studio.py
CHANGED
|
@@ -17,6 +17,7 @@ import websockets
|
|
|
17
17
|
from requests.exceptions import HTTPError, Timeout
|
|
18
18
|
|
|
19
19
|
from datachain.config import Config
|
|
20
|
+
from datachain.dataset import DatasetRecord
|
|
20
21
|
from datachain.error import DataChainError
|
|
21
22
|
from datachain.utils import STUDIO_URL, retry_with_backoff
|
|
22
23
|
|
|
@@ -30,18 +31,39 @@ DatasetExportSignedUrls = Optional[list[str]]
|
|
|
30
31
|
FileUploadData = Optional[dict[str, Any]]
|
|
31
32
|
JobData = Optional[dict[str, Any]]
|
|
32
33
|
JobListData = dict[str, Any]
|
|
34
|
+
ClusterListData = dict[str, Any]
|
|
33
35
|
logger = logging.getLogger("datachain")
|
|
34
36
|
|
|
35
37
|
DATASET_ROWS_CHUNK_SIZE = 8192
|
|
36
38
|
|
|
37
39
|
|
|
40
|
+
def get_studio_env_variable(name: str) -> Any:
|
|
41
|
+
"""
|
|
42
|
+
Get the value of a DataChain Studio environment variable.
|
|
43
|
+
It first checks for the variable prefixed with 'DATACHAIN_STUDIO_',
|
|
44
|
+
then checks for the deprecated 'DVC_STUDIO_' prefix.
|
|
45
|
+
If neither is set, it returns the provided default value.
|
|
46
|
+
"""
|
|
47
|
+
if (value := os.environ.get(f"DATACHAIN_STUDIO_{name}")) is not None:
|
|
48
|
+
return value
|
|
49
|
+
if (value := os.environ.get(f"DVC_STUDIO_{name}")) is not None: # deprecated
|
|
50
|
+
logger.warning(
|
|
51
|
+
"Environment variable 'DVC_STUDIO_%s' is deprecated, "
|
|
52
|
+
"use 'DATACHAIN_STUDIO_%s' instead.",
|
|
53
|
+
name,
|
|
54
|
+
name,
|
|
55
|
+
)
|
|
56
|
+
return value
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
|
|
38
60
|
def _is_server_error(status_code: int) -> bool:
|
|
39
61
|
return str(status_code).startswith("5")
|
|
40
62
|
|
|
41
63
|
|
|
42
64
|
def is_token_set() -> bool:
|
|
43
65
|
return (
|
|
44
|
-
bool(
|
|
66
|
+
bool(get_studio_env_variable("TOKEN"))
|
|
45
67
|
or Config().read().get("studio", {}).get("token") is not None
|
|
46
68
|
)
|
|
47
69
|
|
|
@@ -77,12 +99,12 @@ class StudioClient:
|
|
|
77
99
|
|
|
78
100
|
@property
|
|
79
101
|
def token(self) -> str:
|
|
80
|
-
token =
|
|
102
|
+
token = get_studio_env_variable("TOKEN") or self.config.get("token")
|
|
81
103
|
|
|
82
104
|
if not token:
|
|
83
105
|
raise DataChainError(
|
|
84
106
|
"Studio token is not set. Use `datachain auth login` "
|
|
85
|
-
"or environment variable `
|
|
107
|
+
"or environment variable `DATACHAIN_STUDIO_TOKEN` to set it."
|
|
86
108
|
)
|
|
87
109
|
|
|
88
110
|
return token
|
|
@@ -90,8 +112,8 @@ class StudioClient:
|
|
|
90
112
|
@property
|
|
91
113
|
def url(self) -> str:
|
|
92
114
|
return (
|
|
93
|
-
|
|
94
|
-
) + "/api"
|
|
115
|
+
get_studio_env_variable("URL") or self.config.get("url") or STUDIO_URL
|
|
116
|
+
).rstrip("/") + "/api"
|
|
95
117
|
|
|
96
118
|
@property
|
|
97
119
|
def config(self) -> dict:
|
|
@@ -106,13 +128,13 @@ class StudioClient:
|
|
|
106
128
|
return self._team
|
|
107
129
|
|
|
108
130
|
def _get_team(self) -> str:
|
|
109
|
-
team =
|
|
131
|
+
team = get_studio_env_variable("TEAM") or self.config.get("team")
|
|
110
132
|
|
|
111
133
|
if not team:
|
|
112
134
|
raise DataChainError(
|
|
113
135
|
"Studio team is not set. "
|
|
114
136
|
"Use `datachain auth team <team_name>` "
|
|
115
|
-
"or environment variable `
|
|
137
|
+
"or environment variable `DATACHAIN_STUDIO_TEAM` to set it. "
|
|
116
138
|
"You can also set `studio.team` in the config file."
|
|
117
139
|
)
|
|
118
140
|
|
|
@@ -290,13 +312,17 @@ class StudioClient:
|
|
|
290
312
|
def edit_dataset(
|
|
291
313
|
self,
|
|
292
314
|
name: str,
|
|
315
|
+
namespace: str,
|
|
316
|
+
project: str,
|
|
293
317
|
new_name: Optional[str] = None,
|
|
294
318
|
description: Optional[str] = None,
|
|
295
319
|
attrs: Optional[list[str]] = None,
|
|
296
320
|
) -> Response[DatasetInfoData]:
|
|
297
321
|
body = {
|
|
298
322
|
"new_name": new_name,
|
|
299
|
-
"
|
|
323
|
+
"name": name,
|
|
324
|
+
"namespace": namespace,
|
|
325
|
+
"project": project,
|
|
300
326
|
"description": description,
|
|
301
327
|
"attrs": attrs,
|
|
302
328
|
}
|
|
@@ -309,44 +335,44 @@ class StudioClient:
|
|
|
309
335
|
def rm_dataset(
|
|
310
336
|
self,
|
|
311
337
|
name: str,
|
|
338
|
+
namespace: str,
|
|
339
|
+
project: str,
|
|
312
340
|
version: Optional[str] = None,
|
|
313
341
|
force: Optional[bool] = False,
|
|
314
342
|
) -> Response[DatasetInfoData]:
|
|
315
343
|
return self._send_request(
|
|
316
344
|
"datachain/datasets",
|
|
317
345
|
{
|
|
318
|
-
"
|
|
319
|
-
"
|
|
346
|
+
"name": name,
|
|
347
|
+
"namespace": namespace,
|
|
348
|
+
"project": project,
|
|
349
|
+
"version": version,
|
|
320
350
|
"force": force,
|
|
321
351
|
},
|
|
322
352
|
method="DELETE",
|
|
323
353
|
)
|
|
324
354
|
|
|
325
|
-
def dataset_info(
|
|
355
|
+
def dataset_info(
|
|
356
|
+
self, namespace: str, project: str, name: str
|
|
357
|
+
) -> Response[DatasetInfoData]:
|
|
326
358
|
def _parse_dataset_info(dataset_info):
|
|
327
359
|
_parse_dates(dataset_info, ["created_at", "finished_at"])
|
|
328
360
|
for version in dataset_info.get("versions"):
|
|
329
361
|
_parse_dates(version, ["created_at"])
|
|
362
|
+
_parse_dates(dataset_info.get("project"), ["created_at"])
|
|
363
|
+
_parse_dates(dataset_info.get("project").get("namespace"), ["created_at"])
|
|
330
364
|
|
|
331
365
|
return dataset_info
|
|
332
366
|
|
|
333
367
|
response = self._send_request(
|
|
334
|
-
"datachain/datasets/info",
|
|
368
|
+
"datachain/datasets/info",
|
|
369
|
+
{"namespace": namespace, "project": project, "name": name},
|
|
370
|
+
method="GET",
|
|
335
371
|
)
|
|
336
372
|
if response.ok:
|
|
337
373
|
response.data = _parse_dataset_info(response.data)
|
|
338
374
|
return response
|
|
339
375
|
|
|
340
|
-
def dataset_rows_chunk(
|
|
341
|
-
self, name: str, version: str, offset: int
|
|
342
|
-
) -> Response[DatasetRowsData]:
|
|
343
|
-
req_data = {"dataset_name": name, "dataset_version": version}
|
|
344
|
-
return self._send_request_msgpack(
|
|
345
|
-
"datachain/datasets/rows",
|
|
346
|
-
{**req_data, "offset": offset, "limit": DATASET_ROWS_CHUNK_SIZE},
|
|
347
|
-
method="GET",
|
|
348
|
-
)
|
|
349
|
-
|
|
350
376
|
def dataset_job_versions(self, job_id: str) -> Response[DatasetJobVersionsData]:
|
|
351
377
|
return self._send_request(
|
|
352
378
|
"datachain/datasets/dataset_job_versions",
|
|
@@ -355,20 +381,30 @@ class StudioClient:
|
|
|
355
381
|
)
|
|
356
382
|
|
|
357
383
|
def export_dataset_table(
|
|
358
|
-
self,
|
|
384
|
+
self, dataset: DatasetRecord, version: str
|
|
359
385
|
) -> Response[DatasetExportSignedUrls]:
|
|
360
386
|
return self._send_request(
|
|
361
387
|
"datachain/datasets/export",
|
|
362
|
-
{
|
|
388
|
+
{
|
|
389
|
+
"namespace": dataset.project.namespace.name,
|
|
390
|
+
"project": dataset.project.name,
|
|
391
|
+
"name": dataset.name,
|
|
392
|
+
"version": version,
|
|
393
|
+
},
|
|
363
394
|
method="GET",
|
|
364
395
|
)
|
|
365
396
|
|
|
366
397
|
def dataset_export_status(
|
|
367
|
-
self,
|
|
398
|
+
self, dataset: DatasetRecord, version: str
|
|
368
399
|
) -> Response[DatasetExportStatus]:
|
|
369
400
|
return self._send_request(
|
|
370
401
|
"datachain/datasets/export-status",
|
|
371
|
-
{
|
|
402
|
+
{
|
|
403
|
+
"namespace": dataset.project.namespace.name,
|
|
404
|
+
"project": dataset.project.name,
|
|
405
|
+
"name": dataset.name,
|
|
406
|
+
"version": version,
|
|
407
|
+
},
|
|
372
408
|
method="GET",
|
|
373
409
|
)
|
|
374
410
|
|
|
@@ -391,6 +427,7 @@ class StudioClient:
|
|
|
391
427
|
requirements: Optional[str] = None,
|
|
392
428
|
repository: Optional[str] = None,
|
|
393
429
|
priority: Optional[int] = None,
|
|
430
|
+
cluster_id: Optional[int] = None,
|
|
394
431
|
) -> Response[JobData]:
|
|
395
432
|
data = {
|
|
396
433
|
"query": query,
|
|
@@ -403,6 +440,7 @@ class StudioClient:
|
|
|
403
440
|
"requirements": requirements,
|
|
404
441
|
"repository": repository,
|
|
405
442
|
"priority": priority,
|
|
443
|
+
"compute_cluster_id": cluster_id,
|
|
406
444
|
}
|
|
407
445
|
return self._send_request("datachain/job", data)
|
|
408
446
|
|
|
@@ -423,3 +461,6 @@ class StudioClient:
|
|
|
423
461
|
) -> Response[JobData]:
|
|
424
462
|
url = f"datachain/job/{job_id}/cancel"
|
|
425
463
|
return self._send_request(url, data={}, method="POST")
|
|
464
|
+
|
|
465
|
+
def get_clusters(self) -> Response[ClusterListData]:
|
|
466
|
+
return self._send_request("datachain/clusters", {}, method="GET")
|
datachain/studio.py
CHANGED
|
@@ -6,7 +6,7 @@ from typing import TYPE_CHECKING, Optional
|
|
|
6
6
|
import tabulate
|
|
7
7
|
|
|
8
8
|
from datachain.config import Config, ConfigLevel
|
|
9
|
-
from datachain.dataset import QUERY_DATASET_PREFIX
|
|
9
|
+
from datachain.dataset import QUERY_DATASET_PREFIX, parse_dataset_name
|
|
10
10
|
from datachain.error import DataChainError
|
|
11
11
|
from datachain.remote.studio import StudioClient
|
|
12
12
|
from datachain.utils import STUDIO_URL
|
|
@@ -41,6 +41,7 @@ def process_jobs_args(args: "Namespace"):
|
|
|
41
41
|
args.req,
|
|
42
42
|
args.req_file,
|
|
43
43
|
args.priority,
|
|
44
|
+
args.cluster_id,
|
|
44
45
|
)
|
|
45
46
|
|
|
46
47
|
if args.cmd == "cancel":
|
|
@@ -51,6 +52,9 @@ def process_jobs_args(args: "Namespace"):
|
|
|
51
52
|
if args.cmd == "ls":
|
|
52
53
|
return list_jobs(args.status, args.team, args.limit)
|
|
53
54
|
|
|
55
|
+
if args.cmd == "clusters":
|
|
56
|
+
return list_clusters(args.team)
|
|
57
|
+
|
|
54
58
|
raise DataChainError(f"Unknown command '{args.cmd}'.")
|
|
55
59
|
|
|
56
60
|
|
|
@@ -68,14 +72,24 @@ def process_auth_cli_args(args: "Namespace"):
|
|
|
68
72
|
return logout(args.local)
|
|
69
73
|
if args.cmd == "token":
|
|
70
74
|
return token()
|
|
71
|
-
|
|
72
75
|
if args.cmd == "team":
|
|
73
76
|
return set_team(args)
|
|
74
77
|
raise DataChainError(f"Unknown command '{args.cmd}'.")
|
|
75
78
|
|
|
76
79
|
|
|
77
80
|
def set_team(args: "Namespace"):
|
|
78
|
-
|
|
81
|
+
if args.team_name is None:
|
|
82
|
+
config = Config().read().get("studio", {})
|
|
83
|
+
team = config.get("team")
|
|
84
|
+
if team:
|
|
85
|
+
print(f"Default team is '{team}'")
|
|
86
|
+
return 0
|
|
87
|
+
|
|
88
|
+
raise DataChainError(
|
|
89
|
+
"No default team set. Use `datachain auth team <team_name>` to set one."
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
level = ConfigLevel.LOCAL if args.local else ConfigLevel.GLOBAL
|
|
79
93
|
config = Config(level)
|
|
80
94
|
with config.edit() as conf:
|
|
81
95
|
studio_conf = conf.get("studio", {})
|
|
@@ -88,11 +102,13 @@ def set_team(args: "Namespace"):
|
|
|
88
102
|
def login(args: "Namespace"):
|
|
89
103
|
from dvc_studio_client.auth import StudioAuthError, get_access_token
|
|
90
104
|
|
|
105
|
+
from datachain.remote.studio import get_studio_env_variable
|
|
106
|
+
|
|
91
107
|
config = Config().read().get("studio", {})
|
|
92
108
|
name = args.name
|
|
93
109
|
hostname = (
|
|
94
110
|
args.hostname
|
|
95
|
-
or
|
|
111
|
+
or get_studio_env_variable("URL")
|
|
96
112
|
or config.get("url")
|
|
97
113
|
or STUDIO_URL
|
|
98
114
|
)
|
|
@@ -121,6 +137,7 @@ def login(args: "Namespace"):
|
|
|
121
137
|
level = ConfigLevel.LOCAL if args.local else ConfigLevel.GLOBAL
|
|
122
138
|
config_path = save_config(hostname, access_token, level=level)
|
|
123
139
|
print(f"Authentication complete. Saved token to {config_path}.")
|
|
140
|
+
print("You can now use 'datachain auth team' to set the default team.")
|
|
124
141
|
return 0
|
|
125
142
|
|
|
126
143
|
|
|
@@ -150,6 +167,11 @@ def token():
|
|
|
150
167
|
|
|
151
168
|
|
|
152
169
|
def list_datasets(team: Optional[str] = None, name: Optional[str] = None):
|
|
170
|
+
def ds_full_name(ds: dict) -> str:
|
|
171
|
+
return (
|
|
172
|
+
f"{ds['project']['namespace']['name']}.{ds['project']['name']}.{ds['name']}"
|
|
173
|
+
)
|
|
174
|
+
|
|
153
175
|
if name:
|
|
154
176
|
yield from list_dataset_versions(team, name)
|
|
155
177
|
return
|
|
@@ -166,18 +188,22 @@ def list_datasets(team: Optional[str] = None, name: Optional[str] = None):
|
|
|
166
188
|
|
|
167
189
|
for d in response.data:
|
|
168
190
|
name = d.get("name")
|
|
191
|
+
full_name = ds_full_name(d)
|
|
169
192
|
if name and name.startswith(QUERY_DATASET_PREFIX):
|
|
170
193
|
continue
|
|
171
194
|
|
|
172
195
|
for v in d.get("versions", []):
|
|
173
196
|
version = v.get("version")
|
|
174
|
-
yield (
|
|
197
|
+
yield (full_name, version)
|
|
175
198
|
|
|
176
199
|
|
|
177
200
|
def list_dataset_versions(team: Optional[str] = None, name: str = ""):
|
|
178
201
|
client = StudioClient(team=team)
|
|
179
202
|
|
|
180
|
-
|
|
203
|
+
namespace_name, project_name, name = parse_dataset_name(name)
|
|
204
|
+
if not namespace_name or not project_name:
|
|
205
|
+
raise DataChainError(f"Missing namespace or project form dataset name {name}")
|
|
206
|
+
response = client.dataset_info(namespace_name, project_name, name)
|
|
181
207
|
|
|
182
208
|
if not response.ok:
|
|
183
209
|
raise DataChainError(response.message)
|
|
@@ -193,12 +219,16 @@ def list_dataset_versions(team: Optional[str] = None, name: str = ""):
|
|
|
193
219
|
def edit_studio_dataset(
|
|
194
220
|
team_name: Optional[str],
|
|
195
221
|
name: str,
|
|
222
|
+
namespace: str,
|
|
223
|
+
project: str,
|
|
196
224
|
new_name: Optional[str] = None,
|
|
197
225
|
description: Optional[str] = None,
|
|
198
226
|
attrs: Optional[list[str]] = None,
|
|
199
227
|
):
|
|
200
228
|
client = StudioClient(team=team_name)
|
|
201
|
-
response = client.edit_dataset(
|
|
229
|
+
response = client.edit_dataset(
|
|
230
|
+
name, namespace, project, new_name, description, attrs
|
|
231
|
+
)
|
|
202
232
|
if not response.ok:
|
|
203
233
|
raise DataChainError(response.message)
|
|
204
234
|
|
|
@@ -208,11 +238,13 @@ def edit_studio_dataset(
|
|
|
208
238
|
def remove_studio_dataset(
|
|
209
239
|
team_name: Optional[str],
|
|
210
240
|
name: str,
|
|
241
|
+
namespace: str,
|
|
242
|
+
project: str,
|
|
211
243
|
version: Optional[str] = None,
|
|
212
244
|
force: Optional[bool] = False,
|
|
213
245
|
):
|
|
214
246
|
client = StudioClient(team=team_name)
|
|
215
|
-
response = client.rm_dataset(name, version, force)
|
|
247
|
+
response = client.rm_dataset(name, namespace, project, version, force)
|
|
216
248
|
if not response.ok:
|
|
217
249
|
raise DataChainError(response.message)
|
|
218
250
|
|
|
@@ -268,6 +300,7 @@ def create_job(
|
|
|
268
300
|
req: Optional[list[str]] = None,
|
|
269
301
|
req_file: Optional[str] = None,
|
|
270
302
|
priority: Optional[int] = None,
|
|
303
|
+
cluster_id: Optional[int] = None,
|
|
271
304
|
):
|
|
272
305
|
query_type = "PYTHON" if query_file.endswith(".py") else "SHELL"
|
|
273
306
|
with open(query_file) as f:
|
|
@@ -297,6 +330,7 @@ def create_job(
|
|
|
297
330
|
repository=repository,
|
|
298
331
|
requirements=requirements,
|
|
299
332
|
priority=priority,
|
|
333
|
+
cluster_id=cluster_id,
|
|
300
334
|
)
|
|
301
335
|
if not response.ok:
|
|
302
336
|
raise DataChainError(response.message)
|
|
@@ -380,3 +414,29 @@ def show_job_logs(job_id: str, team_name: Optional[str]):
|
|
|
380
414
|
|
|
381
415
|
client = StudioClient(team=team_name)
|
|
382
416
|
show_logs_from_client(client, job_id)
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def list_clusters(team_name: Optional[str]):
|
|
420
|
+
client = StudioClient(team=team_name)
|
|
421
|
+
response = client.get_clusters()
|
|
422
|
+
if not response.ok:
|
|
423
|
+
raise DataChainError(response.message)
|
|
424
|
+
|
|
425
|
+
clusters = response.data.get("clusters", [])
|
|
426
|
+
if not clusters:
|
|
427
|
+
print("No clusters found")
|
|
428
|
+
return
|
|
429
|
+
|
|
430
|
+
rows = [
|
|
431
|
+
{
|
|
432
|
+
"ID": cluster.get("id"),
|
|
433
|
+
"Status": cluster.get("status"),
|
|
434
|
+
"Cloud Provider": cluster.get("cloud_provider"),
|
|
435
|
+
"Cloud Credentials": cluster.get("cloud_credentials"),
|
|
436
|
+
"Is Active": cluster.get("is_active"),
|
|
437
|
+
"Max Workers": cluster.get("max_workers"),
|
|
438
|
+
}
|
|
439
|
+
for cluster in clusters
|
|
440
|
+
]
|
|
441
|
+
|
|
442
|
+
print(tabulate.tabulate(rows, headers="keys", tablefmt="grid"))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.20.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -94,7 +94,7 @@ Requires-Dist: scipy; extra == "tests"
|
|
|
94
94
|
Requires-Dist: ultralytics; extra == "tests"
|
|
95
95
|
Provides-Extra: dev
|
|
96
96
|
Requires-Dist: datachain[docs,tests]; extra == "dev"
|
|
97
|
-
Requires-Dist: mypy==1.16.
|
|
97
|
+
Requires-Dist: mypy==1.16.1; extra == "dev"
|
|
98
98
|
Requires-Dist: types-python-dateutil; extra == "dev"
|
|
99
99
|
Requires-Dist: types-pytz; extra == "dev"
|
|
100
100
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
@@ -1,40 +1,42 @@
|
|
|
1
|
-
datachain/__init__.py,sha256=
|
|
1
|
+
datachain/__init__.py,sha256=Mq1dyOUSetvR80Swistr84HOuRSIOps6DHuUjAyQffA,1577
|
|
2
2
|
datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
3
3
|
datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
|
|
4
4
|
datachain/cache.py,sha256=yQblPhOh_Mq74Ma7xT1CL1idLJ0HgrQxpGVYvRy_9Eg,3623
|
|
5
5
|
datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
|
|
6
|
-
datachain/dataset.py,sha256=
|
|
7
|
-
datachain/delta.py,sha256=
|
|
8
|
-
datachain/error.py,sha256=
|
|
6
|
+
datachain/dataset.py,sha256=d6b9LKsKBYO40PIxpbMFVRT0TZqaaLpvWNIH-0ladGM,24016
|
|
7
|
+
datachain/delta.py,sha256=v__gty2T-AZYcYAZ5iJ6CBUo7iBGcQF4JxmAPeZIzHw,8452
|
|
8
|
+
datachain/error.py,sha256=OWwWMkzZYJrkcoEDGhJHMf7SfKvxcsOLRF94mjPf29I,1609
|
|
9
9
|
datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
|
|
10
|
-
datachain/listing.py,sha256=
|
|
10
|
+
datachain/listing.py,sha256=T4bCgdCRuFW7bsPUG2PSl5om2nfJL6fzB84m7mCO8cA,7136
|
|
11
|
+
datachain/namespace.py,sha256=4qb-XsTnx6tFWCTCFmDxzUI1pbum2GKVutfVIjFgAkI,1804
|
|
11
12
|
datachain/node.py,sha256=KWDT0ClYXB7FYI-QOvzAa-UDkLJErUI2eWm5FBteYuU,5577
|
|
12
13
|
datachain/nodes_fetcher.py,sha256=_wgaKyqEjkqdwJ_Hj6D8vUYz7hnU7g6xhm0H6ZnYxmE,1095
|
|
13
14
|
datachain/nodes_thread_pool.py,sha256=mdo0s-VybuSZkRUARcUO4Tjh8KFfZr9foHqmupx2SmM,3989
|
|
14
15
|
datachain/progress.py,sha256=lRzxoYP4Qv2XBwD78sOkmYRzHFpZ2ExVNJF8wAeICtY,770
|
|
16
|
+
datachain/project.py,sha256=FobJbBJIy1-e-yemlL7M5eOcy694eceO5CWuY5H7bbw,2305
|
|
15
17
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
18
|
datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
|
|
17
19
|
datachain/semver.py,sha256=UB8GHPBtAP3UJGeiuJoInD7SK-DnB93_Xd1qy_CQ9cU,2074
|
|
18
|
-
datachain/studio.py,sha256=
|
|
20
|
+
datachain/studio.py,sha256=xtuRtqokqvBEICveaQ2FNsD43duXxHTcBPrqv-C9t4M,13009
|
|
19
21
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
20
22
|
datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
|
|
21
23
|
datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
|
|
22
|
-
datachain/catalog/catalog.py,sha256=
|
|
24
|
+
datachain/catalog/catalog.py,sha256=TjEFu00nwlyUB3wgeNyMfK1ROySdV5B_RtKCbBlbwic,63558
|
|
23
25
|
datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
|
|
24
26
|
datachain/catalog/loader.py,sha256=UXjYD6BNRoupPvkiz3-b04jepXhtLHCA4gzKFnXxOtQ,5987
|
|
25
|
-
datachain/cli/__init__.py,sha256=
|
|
27
|
+
datachain/cli/__init__.py,sha256=kJJf_LScBNMOhvd1n3EEZrJHiN-SkJED13xvNTWEK1A,8144
|
|
26
28
|
datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
|
|
27
29
|
datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQaRlIhI,493
|
|
28
|
-
datachain/cli/commands/datasets.py,sha256=
|
|
30
|
+
datachain/cli/commands/datasets.py,sha256=Bva9gTi1HMvvCQPFUPxLYrHQduDlJDWV8EN6IcJcC3Y,6949
|
|
29
31
|
datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
|
|
30
32
|
datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR88,198
|
|
31
33
|
datachain/cli/commands/ls.py,sha256=dSD2_MHng4t9HRFJZWMOCjPL4XU3qaBV3piNl8UXP08,5275
|
|
32
34
|
datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibVE,600
|
|
33
35
|
datachain/cli/commands/query.py,sha256=Xzfgh14nPVH-sclqX1tpZqgfdTugw5s_44v0D33z6FA,1505
|
|
34
36
|
datachain/cli/commands/show.py,sha256=Cf8wBs12h-xtdOzjU5GTDy2C8rF5HJSF0hDJYER1zH8,1606
|
|
35
|
-
datachain/cli/parser/__init__.py,sha256=
|
|
36
|
-
datachain/cli/parser/job.py,sha256=
|
|
37
|
-
datachain/cli/parser/studio.py,sha256=
|
|
37
|
+
datachain/cli/parser/__init__.py,sha256=NPB6ssP4CCt7G1SWZ_8oNQEH2C1lktWgkyHYXDQJZNc,15073
|
|
38
|
+
datachain/cli/parser/job.py,sha256=9mEkbhXFIGZxDAjZT9lWWIf3G-HiuJd1oTJNjSrEppc,5295
|
|
39
|
+
datachain/cli/parser/studio.py,sha256=Bo__LKM7qhJGgkyX8M_bCvgZ2Gvqq6r_X4t1NdtaBIY,3881
|
|
38
40
|
datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
|
|
39
41
|
datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
|
|
40
42
|
datachain/client/azure.py,sha256=7yyAgANHfu9Kfh187MKNTT1guvu9Q-WYsi4vYoY3aew,3270
|
|
@@ -47,11 +49,11 @@ datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
|
|
|
47
49
|
datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
|
|
48
50
|
datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
|
|
49
51
|
datachain/data_storage/job.py,sha256=9r0OGwh22bHNIvLHqg8_-eJSP1YYB-BN5HOla5TdCxw,402
|
|
50
|
-
datachain/data_storage/metastore.py,sha256=
|
|
51
|
-
datachain/data_storage/schema.py,sha256=
|
|
52
|
+
datachain/data_storage/metastore.py,sha256=VJOVzXktpeoMp_On9WwjSLqx1e7RllYiTcPw86uD1lY,51211
|
|
53
|
+
datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
|
|
52
54
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
53
|
-
datachain/data_storage/sqlite.py,sha256=
|
|
54
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
55
|
+
datachain/data_storage/sqlite.py,sha256=H3dpJhqz4y_hde5efR3721GXiDftRGVUeDfN7FuCguk,30166
|
|
56
|
+
datachain/data_storage/warehouse.py,sha256=_7btARw-kd-Nx19S0qW6JqdF3VYyypQXFzsXq68SWKI,32327
|
|
55
57
|
datachain/diff/__init__.py,sha256=-OFZzgOplqO84iWgGY7kfe60NXaWR9JRIh9T-uJboAM,9668
|
|
56
58
|
datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
59
|
datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
|
|
@@ -71,16 +73,18 @@ datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
71
73
|
datachain/lib/arrow.py,sha256=2IuNZ6tRFsxVNhWElqr0ptz28geSDzlDHUtzD4qeDNM,10339
|
|
72
74
|
datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
|
|
73
75
|
datachain/lib/data_model.py,sha256=ZwBXELtqROEdLL4DmxTipnwUZmhQvMz_UVDzyf7nQ9Y,2899
|
|
74
|
-
datachain/lib/dataset_info.py,sha256=
|
|
76
|
+
datachain/lib/dataset_info.py,sha256=uWq2YJgPqvORqIpPcUmDB4_itDVE5cybSn6jlJ_ktng,3331
|
|
75
77
|
datachain/lib/file.py,sha256=PuTa6CEG9CaJXPhxrZFY-R9-DS7ynB9l7Y0bUbd_Qwg,31952
|
|
76
78
|
datachain/lib/hf.py,sha256=gjxuStZBlKtNk3-4yYSlWZDv9zBGblOdvEy_Lwap5hA,5882
|
|
77
79
|
datachain/lib/image.py,sha256=erWvZW5M3emnbl6_fGAOPyKm-1EKbt3vOdWPfe3Oo7U,3265
|
|
78
|
-
datachain/lib/listing.py,sha256=
|
|
80
|
+
datachain/lib/listing.py,sha256=U-2stsTEwEsq4Y80dqGfktGzkmB5-ZntnL1_rzXlH0k,7089
|
|
79
81
|
datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g,1110
|
|
80
82
|
datachain/lib/meta_formats.py,sha256=Epydbdch1g4CojK8wd_ePzmwmljC4fVWlJtZ16jsX-A,6349
|
|
81
83
|
datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
|
|
84
|
+
datachain/lib/namespaces.py,sha256=O2YtylfojZg0mjoaGLOwweyCN1CIQfl4r9dYaVAiJPI,2198
|
|
85
|
+
datachain/lib/projects.py,sha256=7DOWS-zvuLTqQsb4OYicWXBf3InHDHUqaqgDAdRUPiA,2734
|
|
82
86
|
datachain/lib/pytorch.py,sha256=elrmJ4YUDC2LZ9yXM1KwImVBOYIBJf6k0ZR7eSe6Aao,7712
|
|
83
|
-
datachain/lib/settings.py,sha256=
|
|
87
|
+
datachain/lib/settings.py,sha256=9wi0FoHxRxNiyn99pR28IYsMkoo47jQxeXuObQr2Ar0,2929
|
|
84
88
|
datachain/lib/signal_schema.py,sha256=Zhg8qThFDf9eoNWFH6KGeYB-sIGys7A_ybq2CUBG7Dg,36127
|
|
85
89
|
datachain/lib/tar.py,sha256=3WIzao6yD5fbLqXLTt9GhPGNonbFIs_fDRu-9vgLgsA,1038
|
|
86
90
|
datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
@@ -99,15 +103,15 @@ datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUO
|
|
|
99
103
|
datachain/lib/dc/__init__.py,sha256=HD0NYrdy44u6kkpvgGjJcvGz-UGTHui2azghcT8ZUg0,838
|
|
100
104
|
datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
|
|
101
105
|
datachain/lib/dc/database.py,sha256=g5M6NjYR1T0vKte-abV-3Ejnm-HqxTIMir5cRi_SziE,6051
|
|
102
|
-
datachain/lib/dc/datachain.py,sha256=
|
|
103
|
-
datachain/lib/dc/datasets.py,sha256=
|
|
106
|
+
datachain/lib/dc/datachain.py,sha256=DNxcBnhlmxasKU7dnwsNn-p46S5EoP7CsxOxAmgswtQ,84242
|
|
107
|
+
datachain/lib/dc/datasets.py,sha256=07z81HerMMD4KOKJek541KESTB7XMwyna6wHVgxZ9Ek,13620
|
|
104
108
|
datachain/lib/dc/hf.py,sha256=PJl2wiLjdRsMz0SYbLT-6H8b-D5i2WjeH7li8HHOk_0,2145
|
|
105
109
|
datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
|
|
106
|
-
datachain/lib/dc/listings.py,sha256=
|
|
110
|
+
datachain/lib/dc/listings.py,sha256=eVBUP25W81dv46DLqkv8K0X7N3nxhoZm77gFrByeT_E,4660
|
|
107
111
|
datachain/lib/dc/pandas.py,sha256=ObueUXDUFKJGu380GmazdG02ARpKAHPhSaymfmOH13E,1489
|
|
108
112
|
datachain/lib/dc/parquet.py,sha256=zYcSgrWwyEDW9UxGUSVdIVsCu15IGEf0xL8KfWQqK94,1782
|
|
109
|
-
datachain/lib/dc/records.py,sha256=
|
|
110
|
-
datachain/lib/dc/storage.py,sha256=
|
|
113
|
+
datachain/lib/dc/records.py,sha256=AMtfWc7K6mtbW2OiaeIm3SjHTxDGnSgCEQW5u984Qh0,3111
|
|
114
|
+
datachain/lib/dc/storage.py,sha256=PX4wc5t-wmL_VD45rFaeJUJihME3jyGDXESZx04obZY,9203
|
|
111
115
|
datachain/lib/dc/utils.py,sha256=VawOAlJSvAtZbsMg33s5tJe21TRx1Km3QggI1nN6tnw,3984
|
|
112
116
|
datachain/lib/dc/values.py,sha256=7l1n352xWrEdql2NhBcZ3hj8xyPglWiY4qHjFPjn6iw,1428
|
|
113
117
|
datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
|
|
@@ -121,17 +125,17 @@ datachain/model/ultralytics/pose.py,sha256=pBlmt63Qe68FKmexHimUGlNbNOoOlMHXG4fzX
|
|
|
121
125
|
datachain/model/ultralytics/segment.py,sha256=63bDCj43E6iZ0hFI5J6uQfksdCmjEp6sEm1XzVaE8pw,2986
|
|
122
126
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
123
127
|
datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
|
|
124
|
-
datachain/query/dataset.py,sha256=
|
|
128
|
+
datachain/query/dataset.py,sha256=SjFUh77rBTpgBZG4cfMJiJ2DhiCubGVk2cG1RYX4oyA,61571
|
|
125
129
|
datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
|
|
126
130
|
datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
|
|
127
131
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
128
132
|
datachain/query/queue.py,sha256=v0UeK4ilmdiRoJ5OdjB5qpnHTYDxRP4vhVp5Iw_toaI,3512
|
|
129
133
|
datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
|
|
130
|
-
datachain/query/session.py,sha256=
|
|
134
|
+
datachain/query/session.py,sha256=gKblltJAVQAVSTswAgWGDgGbpmFlFzFVkIQojDCjgXM,6809
|
|
131
135
|
datachain/query/udf.py,sha256=e753bDJzTNjGFQn1WGTvOAWSwjDbrFI1-_DDWkWN2ls,1343
|
|
132
136
|
datachain/query/utils.py,sha256=HaSDNH_XGvp_NIcXjcB7j4vJRPi4_tbztDWclYelHY4,1208
|
|
133
137
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
134
|
-
datachain/remote/studio.py,sha256=
|
|
138
|
+
datachain/remote/studio.py,sha256=jA-I6q46GEuiGLp3PlQvCaU17ylHThC0HqS73oeBKUU,15114
|
|
135
139
|
datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
|
|
136
140
|
datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
|
|
137
141
|
datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
|
|
@@ -153,9 +157,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
153
157
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
154
158
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
155
159
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
156
|
-
datachain-0.
|
|
157
|
-
datachain-0.
|
|
158
|
-
datachain-0.
|
|
159
|
-
datachain-0.
|
|
160
|
-
datachain-0.
|
|
161
|
-
datachain-0.
|
|
160
|
+
datachain-0.20.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
161
|
+
datachain-0.20.0.dist-info/METADATA,sha256=etcARLRHIUWT7LdqUv9Di5ZLOV3iq1HAOgiL9dsrd7c,13281
|
|
162
|
+
datachain-0.20.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
163
|
+
datachain-0.20.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
164
|
+
datachain-0.20.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
165
|
+
datachain-0.20.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|