datachain 0.27.0__py3-none-any.whl → 0.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -109,6 +109,11 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
109
109
  studio_run_parser.add_argument(
110
110
  "--cron", action="store", help="Cron expression for the cron task."
111
111
  )
112
+ studio_run_parser.add_argument(
113
+ "--no-wait",
114
+ action="store_true",
115
+ help="Do not wait for the job to finish",
116
+ )
112
117
 
113
118
  studio_ls_help = "List jobs in Studio"
114
119
  studio_ls_description = "List jobs in Studio."
@@ -12,10 +12,11 @@ class JobStatus(int, Enum):
12
12
  CANCELING = 7
13
13
  CANCELED = 8
14
14
  CANCELING_SCHEDULED = 9
15
+ TASK = 11
15
16
 
16
17
  @classmethod
17
18
  def finished(cls) -> tuple[int, ...]:
18
- return cls.COMPLETE, cls.FAILED, cls.CANCELED
19
+ return cls.COMPLETE, cls.FAILED, cls.CANCELED, cls.TASK
19
20
 
20
21
 
21
22
  class JobQueryType(int, Enum):
datachain/studio.py CHANGED
@@ -8,6 +8,7 @@ import dateparser
8
8
  import tabulate
9
9
 
10
10
  from datachain.config import Config, ConfigLevel
11
+ from datachain.data_storage.job import JobStatus
11
12
  from datachain.dataset import QUERY_DATASET_PREFIX, parse_dataset_name
12
13
  from datachain.error import DataChainError
13
14
  from datachain.remote.studio import StudioClient
@@ -20,6 +21,8 @@ POST_LOGIN_MESSAGE = (
20
21
  "Once you've logged in, return here "
21
22
  "and you'll be ready to start using DataChain with Studio."
22
23
  )
24
+ RETRY_MAX_TIMES = 10
25
+ RETRY_SLEEP_SEC = 1
23
26
 
24
27
 
25
28
  def process_jobs_args(args: "Namespace"):
@@ -46,6 +49,7 @@ def process_jobs_args(args: "Namespace"):
46
49
  args.cluster,
47
50
  args.start_time,
48
51
  args.cron,
52
+ args.no_wait,
49
53
  )
50
54
 
51
55
  if args.cmd == "cancel":
@@ -287,17 +291,34 @@ def parse_start_time(start_time_str: Optional[str]) -> Optional[str]:
287
291
  def show_logs_from_client(client, job_id):
288
292
  # Sync usage
289
293
  async def _run():
294
+ retry_count = 0
290
295
  latest_status = None
291
- async for message in client.tail_job_logs(job_id):
292
- if "logs" in message:
293
- for log in message["logs"]:
294
- print(log["message"], end="")
295
- elif "job" in message:
296
- latest_status = message["job"]["status"]
297
- print(f"\n>>>> Job is now in {latest_status} status.")
296
+ processed_statuses = set()
297
+ while True:
298
+ async for message in client.tail_job_logs(job_id):
299
+ if "logs" in message:
300
+ for log in message["logs"]:
301
+ print(log["message"], end="")
302
+ elif "job" in message:
303
+ latest_status = message["job"]["status"]
304
+ if latest_status in processed_statuses:
305
+ continue
306
+ processed_statuses.add(latest_status)
307
+ print(f"\n>>>> Job is now in {latest_status} status.")
308
+
309
+ try:
310
+ if retry_count > RETRY_MAX_TIMES or (
311
+ latest_status and JobStatus[latest_status].finished()
312
+ ):
313
+ break
314
+ await asyncio.sleep(RETRY_SLEEP_SEC)
315
+ retry_count += 1
316
+ except KeyError:
317
+ pass
318
+
298
319
  return latest_status
299
320
 
300
- latest_status = asyncio.run(_run())
321
+ final_status = asyncio.run(_run())
301
322
 
302
323
  response = client.dataset_job_versions(job_id)
303
324
  if not response.ok:
@@ -314,9 +335,9 @@ def show_logs_from_client(client, job_id):
314
335
 
315
336
  exit_code_by_status = {
316
337
  "FAILED": 1,
317
- "CANCELLED": 2,
338
+ "CANCELED": 2,
318
339
  }
319
- return exit_code_by_status.get(latest_status.upper(), 0) if latest_status else 0
340
+ return exit_code_by_status.get(final_status.upper(), 0) if final_status else 0
320
341
 
321
342
 
322
343
  def create_job(
@@ -334,6 +355,7 @@ def create_job(
334
355
  cluster: Optional[str] = None,
335
356
  start_time: Optional[str] = None,
336
357
  cron: Optional[str] = None,
358
+ no_wait: Optional[bool] = False,
337
359
  ):
338
360
  query_type = "PYTHON" if query_file.endswith(".py") else "SHELL"
339
361
  with open(query_file) as f:
@@ -388,7 +410,7 @@ def create_job(
388
410
  print("Open the job in Studio at", response.data.get("job", {}).get("url"))
389
411
  print("=" * 40)
390
412
 
391
- return show_logs_from_client(client, job_id)
413
+ return 0 if no_wait else show_logs_from_client(client, job_id)
392
414
 
393
415
 
394
416
  def upload_files(client: StudioClient, files: list[str]) -> list[str]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.27.0
3
+ Version: 0.28.0
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -45,7 +45,7 @@ Requires-Dist: datamodel-code-generator>=0.25
45
45
  Requires-Dist: Pillow<12,>=10.0.0
46
46
  Requires-Dist: msgpack<2,>=1.0.4
47
47
  Requires-Dist: psutil
48
- Requires-Dist: huggingface_hub
48
+ Requires-Dist: huggingface_hub<0.34.0
49
49
  Requires-Dist: iterative-telemetry>=0.0.10
50
50
  Requires-Dist: platformdirs
51
51
  Requires-Dist: dvc-studio-client<1,>=0.21
@@ -17,7 +17,7 @@ datachain/project.py,sha256=90D4GpJSA3t0fayYZbzrL3sk4U7EJhQo8psnWvdI7_o,2280
17
17
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
19
19
  datachain/semver.py,sha256=UB8GHPBtAP3UJGeiuJoInD7SK-DnB93_Xd1qy_CQ9cU,2074
20
- datachain/studio.py,sha256=RCpVZdHRX-ClEddXaAsZDGFy5o-SOqVCa5NhLj8337s,14486
20
+ datachain/studio.py,sha256=-BmKLVNBLPFveUgVVE2So3aaiGndO2jK2qbHZ0zBDd8,15239
21
21
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
22
22
  datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
23
23
  datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
@@ -35,7 +35,7 @@ datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibV
35
35
  datachain/cli/commands/query.py,sha256=Xzfgh14nPVH-sclqX1tpZqgfdTugw5s_44v0D33z6FA,1505
36
36
  datachain/cli/commands/show.py,sha256=Cf8wBs12h-xtdOzjU5GTDy2C8rF5HJSF0hDJYER1zH8,1606
37
37
  datachain/cli/parser/__init__.py,sha256=NPB6ssP4CCt7G1SWZ_8oNQEH2C1lktWgkyHYXDQJZNc,15073
38
- datachain/cli/parser/job.py,sha256=iytBZaCcQUhaOcRlYZFeAJsscN2T2XcEY7MibTeuZhg,5786
38
+ datachain/cli/parser/job.py,sha256=g6ozI3pnV0ly79L7M9mikCeYTPgKlG5gR0D144R82tk,5928
39
39
  datachain/cli/parser/studio.py,sha256=Bo__LKM7qhJGgkyX8M_bCvgZ2Gvqq6r_X4t1NdtaBIY,3881
40
40
  datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
41
41
  datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
@@ -48,7 +48,7 @@ datachain/client/local.py,sha256=0J52Wzvw25hSucVlzBvLuMRAZwrAHZAYDvD1mNBqf4c,460
48
48
  datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
49
49
  datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
50
50
  datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
51
- datachain/data_storage/job.py,sha256=9r0OGwh22bHNIvLHqg8_-eJSP1YYB-BN5HOla5TdCxw,402
51
+ datachain/data_storage/job.py,sha256=ZkeXCNUj_VCkoKYx29hqB4AcfVUielnRjY-GYUcUxt4,426
52
52
  datachain/data_storage/metastore.py,sha256=Qw332arvhgXB4UY0yX-Hu8Vgl3smU12l6bvxrL9Q-vo,53810
53
53
  datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
54
54
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
@@ -158,9 +158,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
158
158
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
159
159
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
160
160
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
161
- datachain-0.27.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
162
- datachain-0.27.0.dist-info/METADATA,sha256=PWZ_EWTpk1OvWlQZe__5SCjFem6BD1AtYmTxJ5wV3iY,13759
163
- datachain-0.27.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
164
- datachain-0.27.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
165
- datachain-0.27.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
166
- datachain-0.27.0.dist-info/RECORD,,
161
+ datachain-0.28.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
162
+ datachain-0.28.0.dist-info/METADATA,sha256=lA3lv9RX2NeQPobrEjoEbAwg5K3zmnAnbDJ_hjR8KLw,13766
163
+ datachain-0.28.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
164
+ datachain-0.28.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
165
+ datachain-0.28.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
166
+ datachain-0.28.0.dist-info/RECORD,,