datachain 0.27.0__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/cli/parser/job.py +5 -0
- datachain/data_storage/job.py +2 -1
- datachain/studio.py +33 -11
- {datachain-0.27.0.dist-info → datachain-0.28.0.dist-info}/METADATA +2 -2
- {datachain-0.27.0.dist-info → datachain-0.28.0.dist-info}/RECORD +9 -9
- {datachain-0.27.0.dist-info → datachain-0.28.0.dist-info}/WHEEL +0 -0
- {datachain-0.27.0.dist-info → datachain-0.28.0.dist-info}/entry_points.txt +0 -0
- {datachain-0.27.0.dist-info → datachain-0.28.0.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.27.0.dist-info → datachain-0.28.0.dist-info}/top_level.txt +0 -0
datachain/cli/parser/job.py
CHANGED
|
@@ -109,6 +109,11 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
109
109
|
studio_run_parser.add_argument(
|
|
110
110
|
"--cron", action="store", help="Cron expression for the cron task."
|
|
111
111
|
)
|
|
112
|
+
studio_run_parser.add_argument(
|
|
113
|
+
"--no-wait",
|
|
114
|
+
action="store_true",
|
|
115
|
+
help="Do not wait for the job to finish",
|
|
116
|
+
)
|
|
112
117
|
|
|
113
118
|
studio_ls_help = "List jobs in Studio"
|
|
114
119
|
studio_ls_description = "List jobs in Studio."
|
datachain/data_storage/job.py
CHANGED
|
@@ -12,10 +12,11 @@ class JobStatus(int, Enum):
|
|
|
12
12
|
CANCELING = 7
|
|
13
13
|
CANCELED = 8
|
|
14
14
|
CANCELING_SCHEDULED = 9
|
|
15
|
+
TASK = 11
|
|
15
16
|
|
|
16
17
|
@classmethod
|
|
17
18
|
def finished(cls) -> tuple[int, ...]:
|
|
18
|
-
return cls.COMPLETE, cls.FAILED, cls.CANCELED
|
|
19
|
+
return cls.COMPLETE, cls.FAILED, cls.CANCELED, cls.TASK
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
class JobQueryType(int, Enum):
|
datachain/studio.py
CHANGED
|
@@ -8,6 +8,7 @@ import dateparser
|
|
|
8
8
|
import tabulate
|
|
9
9
|
|
|
10
10
|
from datachain.config import Config, ConfigLevel
|
|
11
|
+
from datachain.data_storage.job import JobStatus
|
|
11
12
|
from datachain.dataset import QUERY_DATASET_PREFIX, parse_dataset_name
|
|
12
13
|
from datachain.error import DataChainError
|
|
13
14
|
from datachain.remote.studio import StudioClient
|
|
@@ -20,6 +21,8 @@ POST_LOGIN_MESSAGE = (
|
|
|
20
21
|
"Once you've logged in, return here "
|
|
21
22
|
"and you'll be ready to start using DataChain with Studio."
|
|
22
23
|
)
|
|
24
|
+
RETRY_MAX_TIMES = 10
|
|
25
|
+
RETRY_SLEEP_SEC = 1
|
|
23
26
|
|
|
24
27
|
|
|
25
28
|
def process_jobs_args(args: "Namespace"):
|
|
@@ -46,6 +49,7 @@ def process_jobs_args(args: "Namespace"):
|
|
|
46
49
|
args.cluster,
|
|
47
50
|
args.start_time,
|
|
48
51
|
args.cron,
|
|
52
|
+
args.no_wait,
|
|
49
53
|
)
|
|
50
54
|
|
|
51
55
|
if args.cmd == "cancel":
|
|
@@ -287,17 +291,34 @@ def parse_start_time(start_time_str: Optional[str]) -> Optional[str]:
|
|
|
287
291
|
def show_logs_from_client(client, job_id):
|
|
288
292
|
# Sync usage
|
|
289
293
|
async def _run():
|
|
294
|
+
retry_count = 0
|
|
290
295
|
latest_status = None
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
296
|
+
processed_statuses = set()
|
|
297
|
+
while True:
|
|
298
|
+
async for message in client.tail_job_logs(job_id):
|
|
299
|
+
if "logs" in message:
|
|
300
|
+
for log in message["logs"]:
|
|
301
|
+
print(log["message"], end="")
|
|
302
|
+
elif "job" in message:
|
|
303
|
+
latest_status = message["job"]["status"]
|
|
304
|
+
if latest_status in processed_statuses:
|
|
305
|
+
continue
|
|
306
|
+
processed_statuses.add(latest_status)
|
|
307
|
+
print(f"\n>>>> Job is now in {latest_status} status.")
|
|
308
|
+
|
|
309
|
+
try:
|
|
310
|
+
if retry_count > RETRY_MAX_TIMES or (
|
|
311
|
+
latest_status and JobStatus[latest_status].finished()
|
|
312
|
+
):
|
|
313
|
+
break
|
|
314
|
+
await asyncio.sleep(RETRY_SLEEP_SEC)
|
|
315
|
+
retry_count += 1
|
|
316
|
+
except KeyError:
|
|
317
|
+
pass
|
|
318
|
+
|
|
298
319
|
return latest_status
|
|
299
320
|
|
|
300
|
-
|
|
321
|
+
final_status = asyncio.run(_run())
|
|
301
322
|
|
|
302
323
|
response = client.dataset_job_versions(job_id)
|
|
303
324
|
if not response.ok:
|
|
@@ -314,9 +335,9 @@ def show_logs_from_client(client, job_id):
|
|
|
314
335
|
|
|
315
336
|
exit_code_by_status = {
|
|
316
337
|
"FAILED": 1,
|
|
317
|
-
"
|
|
338
|
+
"CANCELED": 2,
|
|
318
339
|
}
|
|
319
|
-
return exit_code_by_status.get(
|
|
340
|
+
return exit_code_by_status.get(final_status.upper(), 0) if final_status else 0
|
|
320
341
|
|
|
321
342
|
|
|
322
343
|
def create_job(
|
|
@@ -334,6 +355,7 @@ def create_job(
|
|
|
334
355
|
cluster: Optional[str] = None,
|
|
335
356
|
start_time: Optional[str] = None,
|
|
336
357
|
cron: Optional[str] = None,
|
|
358
|
+
no_wait: Optional[bool] = False,
|
|
337
359
|
):
|
|
338
360
|
query_type = "PYTHON" if query_file.endswith(".py") else "SHELL"
|
|
339
361
|
with open(query_file) as f:
|
|
@@ -388,7 +410,7 @@ def create_job(
|
|
|
388
410
|
print("Open the job in Studio at", response.data.get("job", {}).get("url"))
|
|
389
411
|
print("=" * 40)
|
|
390
412
|
|
|
391
|
-
return show_logs_from_client(client, job_id)
|
|
413
|
+
return 0 if no_wait else show_logs_from_client(client, job_id)
|
|
392
414
|
|
|
393
415
|
|
|
394
416
|
def upload_files(client: StudioClient, files: list[str]) -> list[str]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datachain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.28.0
|
|
4
4
|
Summary: Wrangle unstructured AI data at scale
|
|
5
5
|
Author-email: Dmitry Petrov <support@dvc.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -45,7 +45,7 @@ Requires-Dist: datamodel-code-generator>=0.25
|
|
|
45
45
|
Requires-Dist: Pillow<12,>=10.0.0
|
|
46
46
|
Requires-Dist: msgpack<2,>=1.0.4
|
|
47
47
|
Requires-Dist: psutil
|
|
48
|
-
Requires-Dist: huggingface_hub
|
|
48
|
+
Requires-Dist: huggingface_hub<0.34.0
|
|
49
49
|
Requires-Dist: iterative-telemetry>=0.0.10
|
|
50
50
|
Requires-Dist: platformdirs
|
|
51
51
|
Requires-Dist: dvc-studio-client<1,>=0.21
|
|
@@ -17,7 +17,7 @@ datachain/project.py,sha256=90D4GpJSA3t0fayYZbzrL3sk4U7EJhQo8psnWvdI7_o,2280
|
|
|
17
17
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
|
|
19
19
|
datachain/semver.py,sha256=UB8GHPBtAP3UJGeiuJoInD7SK-DnB93_Xd1qy_CQ9cU,2074
|
|
20
|
-
datachain/studio.py,sha256
|
|
20
|
+
datachain/studio.py,sha256=-BmKLVNBLPFveUgVVE2So3aaiGndO2jK2qbHZ0zBDd8,15239
|
|
21
21
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
22
22
|
datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
|
|
23
23
|
datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
|
|
@@ -35,7 +35,7 @@ datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibV
|
|
|
35
35
|
datachain/cli/commands/query.py,sha256=Xzfgh14nPVH-sclqX1tpZqgfdTugw5s_44v0D33z6FA,1505
|
|
36
36
|
datachain/cli/commands/show.py,sha256=Cf8wBs12h-xtdOzjU5GTDy2C8rF5HJSF0hDJYER1zH8,1606
|
|
37
37
|
datachain/cli/parser/__init__.py,sha256=NPB6ssP4CCt7G1SWZ_8oNQEH2C1lktWgkyHYXDQJZNc,15073
|
|
38
|
-
datachain/cli/parser/job.py,sha256=
|
|
38
|
+
datachain/cli/parser/job.py,sha256=g6ozI3pnV0ly79L7M9mikCeYTPgKlG5gR0D144R82tk,5928
|
|
39
39
|
datachain/cli/parser/studio.py,sha256=Bo__LKM7qhJGgkyX8M_bCvgZ2Gvqq6r_X4t1NdtaBIY,3881
|
|
40
40
|
datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
|
|
41
41
|
datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
|
|
@@ -48,7 +48,7 @@ datachain/client/local.py,sha256=0J52Wzvw25hSucVlzBvLuMRAZwrAHZAYDvD1mNBqf4c,460
|
|
|
48
48
|
datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
|
|
49
49
|
datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
|
|
50
50
|
datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
|
|
51
|
-
datachain/data_storage/job.py,sha256=
|
|
51
|
+
datachain/data_storage/job.py,sha256=ZkeXCNUj_VCkoKYx29hqB4AcfVUielnRjY-GYUcUxt4,426
|
|
52
52
|
datachain/data_storage/metastore.py,sha256=Qw332arvhgXB4UY0yX-Hu8Vgl3smU12l6bvxrL9Q-vo,53810
|
|
53
53
|
datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
|
|
54
54
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
@@ -158,9 +158,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
158
158
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
159
159
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
160
160
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
161
|
-
datachain-0.
|
|
162
|
-
datachain-0.
|
|
163
|
-
datachain-0.
|
|
164
|
-
datachain-0.
|
|
165
|
-
datachain-0.
|
|
166
|
-
datachain-0.
|
|
161
|
+
datachain-0.28.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
162
|
+
datachain-0.28.0.dist-info/METADATA,sha256=lA3lv9RX2NeQPobrEjoEbAwg5K3zmnAnbDJ_hjR8KLw,13766
|
|
163
|
+
datachain-0.28.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
164
|
+
datachain-0.28.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
165
|
+
datachain-0.28.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
166
|
+
datachain-0.28.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|