datachain 0.17.0__py3-none-any.whl → 0.17.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/cli/parser/job.py +30 -0
- datachain/data_storage/warehouse.py +1 -1
- datachain/lib/dc/values.py +2 -0
- datachain/query/dispatch.py +8 -6
- datachain/remote/studio.py +12 -1
- datachain/studio.py +33 -2
- {datachain-0.17.0.dist-info → datachain-0.17.2.dist-info}/METADATA +2 -2
- {datachain-0.17.0.dist-info → datachain-0.17.2.dist-info}/RECORD +12 -12
- {datachain-0.17.0.dist-info → datachain-0.17.2.dist-info}/WHEEL +1 -1
- {datachain-0.17.0.dist-info → datachain-0.17.2.dist-info}/entry_points.txt +0 -0
- {datachain-0.17.0.dist-info → datachain-0.17.2.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.17.0.dist-info → datachain-0.17.2.dist-info}/top_level.txt +0 -0
datachain/cli/parser/job.py
CHANGED
|
@@ -83,6 +83,36 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
83
83
|
help="Python package requirements",
|
|
84
84
|
)
|
|
85
85
|
|
|
86
|
+
studio_ls_help = "List jobs in Studio"
|
|
87
|
+
studio_ls_description = "List jobs in Studio."
|
|
88
|
+
|
|
89
|
+
studio_ls_parser = jobs_subparser.add_parser(
|
|
90
|
+
"ls",
|
|
91
|
+
parents=[parent_parser],
|
|
92
|
+
description=studio_ls_description,
|
|
93
|
+
help=studio_ls_help,
|
|
94
|
+
formatter_class=CustomHelpFormatter,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
studio_ls_parser.add_argument(
|
|
98
|
+
"--status",
|
|
99
|
+
action="store",
|
|
100
|
+
help="Status to filter jobs by",
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
studio_ls_parser.add_argument(
|
|
104
|
+
"--team",
|
|
105
|
+
action="store",
|
|
106
|
+
default=None,
|
|
107
|
+
help="Team to list jobs for (default: from config)",
|
|
108
|
+
)
|
|
109
|
+
studio_ls_parser.add_argument(
|
|
110
|
+
"--limit",
|
|
111
|
+
type=int,
|
|
112
|
+
default=20,
|
|
113
|
+
help="Limit the number of jobs returned (default: 20)",
|
|
114
|
+
)
|
|
115
|
+
|
|
86
116
|
studio_cancel_help = "Cancel a job in Studio"
|
|
87
117
|
studio_cancel_description = "Cancel a running job in Studio."
|
|
88
118
|
|
|
@@ -258,7 +258,7 @@ class AbstractWarehouse(ABC, Serializable):
|
|
|
258
258
|
if Client.is_data_source_uri(dataset_name):
|
|
259
259
|
# for datasets that are created for bucket listing we use different prefix
|
|
260
260
|
prefix = self.DATASET_SOURCE_TABLE_PREFIX
|
|
261
|
-
return f"{prefix}{dataset_name}_{version}"
|
|
261
|
+
return f"{prefix}{dataset_name}_{version.replace('.', '_')}"
|
|
262
262
|
|
|
263
263
|
def temp_table_name(self) -> str:
|
|
264
264
|
return self.TMP_TABLE_NAME_PREFIX + _random_string(6)
|
datachain/lib/dc/values.py
CHANGED
datachain/query/dispatch.py
CHANGED
|
@@ -203,9 +203,10 @@ class UDFDispatcher:
|
|
|
203
203
|
def get_inputs() -> Iterable["RowsOutput"]:
|
|
204
204
|
warehouse = self.catalog.warehouse.clone()
|
|
205
205
|
if ids_only:
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
206
|
+
for ids in batched(input_rows, DEFAULT_BATCH_SIZE):
|
|
207
|
+
yield from warehouse.dataset_rows_select_from_ids(
|
|
208
|
+
self.query, ids, self.is_batching
|
|
209
|
+
)
|
|
209
210
|
else:
|
|
210
211
|
yield from input_rows
|
|
211
212
|
|
|
@@ -425,8 +426,9 @@ class UDFWorker:
|
|
|
425
426
|
warehouse = self.catalog.warehouse.clone()
|
|
426
427
|
while (batch := get_from_queue(self.task_queue)) != STOP_SIGNAL:
|
|
427
428
|
if ids_only:
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
429
|
+
for ids in batched(batch, DEFAULT_BATCH_SIZE):
|
|
430
|
+
yield from warehouse.dataset_rows_select_from_ids(
|
|
431
|
+
self.query, ids, self.is_batching
|
|
432
|
+
)
|
|
431
433
|
else:
|
|
432
434
|
yield from batch
|
datachain/remote/studio.py
CHANGED
|
@@ -29,7 +29,7 @@ DatasetExportStatus = Optional[dict[str, Any]]
|
|
|
29
29
|
DatasetExportSignedUrls = Optional[list[str]]
|
|
30
30
|
FileUploadData = Optional[dict[str, Any]]
|
|
31
31
|
JobData = Optional[dict[str, Any]]
|
|
32
|
-
|
|
32
|
+
JobListData = dict[str, Any]
|
|
33
33
|
logger = logging.getLogger("datachain")
|
|
34
34
|
|
|
35
35
|
DATASET_ROWS_CHUNK_SIZE = 8192
|
|
@@ -402,6 +402,17 @@ class StudioClient:
|
|
|
402
402
|
}
|
|
403
403
|
return self._send_request("datachain/job", data)
|
|
404
404
|
|
|
405
|
+
def get_jobs(
|
|
406
|
+
self,
|
|
407
|
+
status: Optional[str] = None,
|
|
408
|
+
limit: int = 20,
|
|
409
|
+
) -> Response[JobListData]:
|
|
410
|
+
return self._send_request(
|
|
411
|
+
"datachain/jobs",
|
|
412
|
+
{"status": status, "limit": limit} if status else {"limit": limit},
|
|
413
|
+
method="GET",
|
|
414
|
+
)
|
|
415
|
+
|
|
405
416
|
def cancel_job(
|
|
406
417
|
self,
|
|
407
418
|
job_id: str,
|
datachain/studio.py
CHANGED
|
@@ -3,6 +3,8 @@ import os
|
|
|
3
3
|
import sys
|
|
4
4
|
from typing import TYPE_CHECKING, Optional
|
|
5
5
|
|
|
6
|
+
import tabulate
|
|
7
|
+
|
|
6
8
|
from datachain.config import Config, ConfigLevel
|
|
7
9
|
from datachain.dataset import QUERY_DATASET_PREFIX
|
|
8
10
|
from datachain.error import DataChainError
|
|
@@ -44,6 +46,10 @@ def process_jobs_args(args: "Namespace"):
|
|
|
44
46
|
return cancel_job(args.id, args.team)
|
|
45
47
|
if args.cmd == "logs":
|
|
46
48
|
return show_job_logs(args.id, args.team)
|
|
49
|
+
|
|
50
|
+
if args.cmd == "ls":
|
|
51
|
+
return list_jobs(args.status, args.team, args.limit)
|
|
52
|
+
|
|
47
53
|
raise DataChainError(f"Unknown command '{args.cmd}'.")
|
|
48
54
|
|
|
49
55
|
|
|
@@ -240,13 +246,13 @@ def show_logs_from_client(client, job_id):
|
|
|
240
246
|
raise DataChainError(response.message)
|
|
241
247
|
|
|
242
248
|
response_data = response.data
|
|
243
|
-
if response_data:
|
|
249
|
+
if response_data and response_data.get("dataset_versions"):
|
|
244
250
|
dataset_versions = response_data.get("dataset_versions", [])
|
|
245
251
|
print("\n\n>>>> Dataset versions created during the job:")
|
|
246
252
|
for version in dataset_versions:
|
|
247
253
|
print(f" - {version.get('dataset_name')}@v{version.get('version')}")
|
|
248
254
|
else:
|
|
249
|
-
print("
|
|
255
|
+
print("\n\nNo dataset versions created during the job.")
|
|
250
256
|
|
|
251
257
|
|
|
252
258
|
def create_job(
|
|
@@ -337,6 +343,31 @@ def cancel_job(job_id: str, team_name: Optional[str]):
|
|
|
337
343
|
print(f"Job {job_id} canceled")
|
|
338
344
|
|
|
339
345
|
|
|
346
|
+
def list_jobs(status: Optional[str], team_name: Optional[str], limit: int):
|
|
347
|
+
client = StudioClient(team=team_name)
|
|
348
|
+
response = client.get_jobs(status, limit)
|
|
349
|
+
if not response.ok:
|
|
350
|
+
raise DataChainError(response.message)
|
|
351
|
+
|
|
352
|
+
jobs = response.data.get("jobs", [])
|
|
353
|
+
if not jobs:
|
|
354
|
+
print("No jobs found")
|
|
355
|
+
return
|
|
356
|
+
|
|
357
|
+
rows = [
|
|
358
|
+
{
|
|
359
|
+
"ID": job.get("id"),
|
|
360
|
+
"Name": job.get("name"),
|
|
361
|
+
"Status": job.get("status"),
|
|
362
|
+
"Created at": job.get("created_at"),
|
|
363
|
+
"Created by": job.get("created_by"),
|
|
364
|
+
}
|
|
365
|
+
for job in jobs
|
|
366
|
+
]
|
|
367
|
+
|
|
368
|
+
print(tabulate.tabulate(rows, headers="keys", tablefmt="grid"))
|
|
369
|
+
|
|
370
|
+
|
|
340
371
|
def show_job_logs(job_id: str, team_name: Optional[str]):
|
|
341
372
|
token = Config().read().get("studio", {}).get("token")
|
|
342
373
|
if not token:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.17.
|
|
3
|
+
Version: 0.17.2
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -44,7 +44,7 @@ Requires-Dist: datamodel-code-generator>=0.25
|
|
|
44
44
|
Requires-Dist: Pillow<12,>=10.0.0
|
|
45
45
|
Requires-Dist: msgpack<2,>=1.0.4
|
|
46
46
|
Requires-Dist: psutil
|
|
47
|
-
Requires-Dist: huggingface_hub
|
|
47
|
+
Requires-Dist: huggingface_hub<0.31
|
|
48
48
|
Requires-Dist: iterative-telemetry>=0.0.10
|
|
49
49
|
Requires-Dist: platformdirs
|
|
50
50
|
Requires-Dist: dvc-studio-client<1,>=0.21
|
|
@@ -14,7 +14,7 @@ datachain/progress.py,sha256=lRzxoYP4Qv2XBwD78sOkmYRzHFpZ2ExVNJF8wAeICtY,770
|
|
|
14
14
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
|
|
16
16
|
datachain/semver.py,sha256=t_3Y5OGLEthrstBwuwrf5pXVquEuRFu3ZoGe3ajfJB8,1715
|
|
17
|
-
datachain/studio.py,sha256=
|
|
17
|
+
datachain/studio.py,sha256=5MSDk-pM2Na0TlK0eCStmZ47hNBvlY2WPKzCQFmBPpM,10948
|
|
18
18
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
19
19
|
datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
|
|
20
20
|
datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
|
|
@@ -32,7 +32,7 @@ datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibV
|
|
|
32
32
|
datachain/cli/commands/query.py,sha256=Xzfgh14nPVH-sclqX1tpZqgfdTugw5s_44v0D33z6FA,1505
|
|
33
33
|
datachain/cli/commands/show.py,sha256=Cf8wBs12h-xtdOzjU5GTDy2C8rF5HJSF0hDJYER1zH8,1606
|
|
34
34
|
datachain/cli/parser/__init__.py,sha256=sjCIcosAtZqa0m50GMQHqmCkZSYxKyZNwQ29XwRQlP0,15913
|
|
35
|
-
datachain/cli/parser/job.py,sha256=
|
|
35
|
+
datachain/cli/parser/job.py,sha256=dpRY6KQgooCXhQGQiebPuNuGouKk5jclkpafpQ2TDvQ,4373
|
|
36
36
|
datachain/cli/parser/studio.py,sha256=Y-1OlQGecLVi9QofvWUfSlPd2ISyaESf7QFGZqGsrdw,3609
|
|
37
37
|
datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
|
|
38
38
|
datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
|
|
@@ -50,7 +50,7 @@ datachain/data_storage/metastore.py,sha256=vo2ab-U_-BKfeFYTmvpbCoMyMZEVxrVqM9Djj
|
|
|
50
50
|
datachain/data_storage/schema.py,sha256=asZYz1cg_WKfe2Q-k5W51E2z2CzHU5B4QEDZDMFr8yo,9346
|
|
51
51
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
52
52
|
datachain/data_storage/sqlite.py,sha256=bwZAB_NUMT2WMv5tPQnnLFA0P-PiQtxzSaQ1q6xDxOU,24590
|
|
53
|
-
datachain/data_storage/warehouse.py,sha256=
|
|
53
|
+
datachain/data_storage/warehouse.py,sha256=RkdX1cunfmpDkRYRdOGNy0kLw7RekIokVl3Dd0i-hrA,31534
|
|
54
54
|
datachain/diff/__init__.py,sha256=YkGdiDbZIMhAZ2SJ4eSe00HU67VP1P6SL2L_t0ODYMs,9425
|
|
55
55
|
datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
56
|
datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
|
|
@@ -108,7 +108,7 @@ datachain/lib/dc/parquet.py,sha256=zYcSgrWwyEDW9UxGUSVdIVsCu15IGEf0xL8KfWQqK94,1
|
|
|
108
108
|
datachain/lib/dc/records.py,sha256=J1I69J2gFIBjRTGr2LG-5qn_rTVzRLcr2y3tVDrmHdg,3068
|
|
109
109
|
datachain/lib/dc/storage.py,sha256=Z0rqBNoss8RVHo8I3Bpc5hJBLTJfmJHRbV0SA_tR5LI,5635
|
|
110
110
|
datachain/lib/dc/utils.py,sha256=VawOAlJSvAtZbsMg33s5tJe21TRx1Km3QggI1nN6tnw,3984
|
|
111
|
-
datachain/lib/dc/values.py,sha256=
|
|
111
|
+
datachain/lib/dc/values.py,sha256=7l1n352xWrEdql2NhBcZ3hj8xyPglWiY4qHjFPjn6iw,1428
|
|
112
112
|
datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
|
|
113
113
|
datachain/model/bbox.py,sha256=cQNHuQuVsh6bW3n3Hj40F2Cc20cExQ9Lg_q7R2jxUMI,9324
|
|
114
114
|
datachain/model/pose.py,sha256=rjquA6M-I-Y30Xm6YSkGv1OY52hJZmR2AuxbIpE5uD0,3865
|
|
@@ -121,7 +121,7 @@ datachain/model/ultralytics/segment.py,sha256=koq1HASo29isf0in6oSlzmU4IzsmOXe87F
|
|
|
121
121
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
122
122
|
datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
|
|
123
123
|
datachain/query/dataset.py,sha256=v9xVYyft06KNvsPrA-j59krZXwRAunKvyYyJsINZpxM,59114
|
|
124
|
-
datachain/query/dispatch.py,sha256=
|
|
124
|
+
datachain/query/dispatch.py,sha256=15M3zlTUFKM6D2ijITX4o5QxCkRe2klkODsIDi3aQOg,15544
|
|
125
125
|
datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
|
|
126
126
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
127
127
|
datachain/query/queue.py,sha256=v0UeK4ilmdiRoJ5OdjB5qpnHTYDxRP4vhVp5Iw_toaI,3512
|
|
@@ -130,7 +130,7 @@ datachain/query/session.py,sha256=3nyOvPmLiA86IdHc3BL6Dt_himtHVvaDz_I1h3hZ_gI,65
|
|
|
130
130
|
datachain/query/udf.py,sha256=e753bDJzTNjGFQn1WGTvOAWSwjDbrFI1-_DDWkWN2ls,1343
|
|
131
131
|
datachain/query/utils.py,sha256=HaSDNH_XGvp_NIcXjcB7j4vJRPi4_tbztDWclYelHY4,1208
|
|
132
132
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
133
|
-
datachain/remote/studio.py,sha256=
|
|
133
|
+
datachain/remote/studio.py,sha256=4oypHea2NYB7br_BJTYPAmh3FH8KV835mq01Pwexrrg,13524
|
|
134
134
|
datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
|
|
135
135
|
datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
|
|
136
136
|
datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
|
|
@@ -152,9 +152,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
152
152
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
153
153
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
154
154
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
155
|
-
datachain-0.17.
|
|
156
|
-
datachain-0.17.
|
|
157
|
-
datachain-0.17.
|
|
158
|
-
datachain-0.17.
|
|
159
|
-
datachain-0.17.
|
|
160
|
-
datachain-0.17.
|
|
155
|
+
datachain-0.17.2.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
156
|
+
datachain-0.17.2.dist-info/METADATA,sha256=JEQUpUwLjYtMDRL9nRObwbo3bnYo2MNgQVomGf-JBpI,11336
|
|
157
|
+
datachain-0.17.2.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
|
|
158
|
+
datachain-0.17.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
159
|
+
datachain-0.17.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
160
|
+
datachain-0.17.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|