datachain 0.17.0__py3-none-any.whl → 0.17.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -83,6 +83,36 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
83
83
  help="Python package requirements",
84
84
  )
85
85
 
86
+ studio_ls_help = "List jobs in Studio"
87
+ studio_ls_description = "List jobs in Studio."
88
+
89
+ studio_ls_parser = jobs_subparser.add_parser(
90
+ "ls",
91
+ parents=[parent_parser],
92
+ description=studio_ls_description,
93
+ help=studio_ls_help,
94
+ formatter_class=CustomHelpFormatter,
95
+ )
96
+
97
+ studio_ls_parser.add_argument(
98
+ "--status",
99
+ action="store",
100
+ help="Status to filter jobs by",
101
+ )
102
+
103
+ studio_ls_parser.add_argument(
104
+ "--team",
105
+ action="store",
106
+ default=None,
107
+ help="Team to list jobs for (default: from config)",
108
+ )
109
+ studio_ls_parser.add_argument(
110
+ "--limit",
111
+ type=int,
112
+ default=20,
113
+ help="Limit the number of jobs returned (default: 20)",
114
+ )
115
+
86
116
  studio_cancel_help = "Cancel a job in Studio"
87
117
  studio_cancel_description = "Cancel a running job in Studio."
88
118
 
@@ -258,7 +258,7 @@ class AbstractWarehouse(ABC, Serializable):
258
258
  if Client.is_data_source_uri(dataset_name):
259
259
  # for datasets that are created for bucket listing we use different prefix
260
260
  prefix = self.DATASET_SOURCE_TABLE_PREFIX
261
- return f"{prefix}{dataset_name}_{version}"
261
+ return f"{prefix}{dataset_name}_{version.replace('.', '_')}"
262
262
 
263
263
  def temp_table_name(self) -> str:
264
264
  return self.TMP_TABLE_NAME_PREFIX + _random_string(6)
@@ -42,6 +42,8 @@ def read_values(
42
42
  def _func_fr() -> Iterator[tuple_type]: # type: ignore[valid-type]
43
43
  yield from tuples
44
44
 
45
+ _func_fr.__name__ = "read_values"
46
+
45
47
  chain = read_records(
46
48
  DataChain.DEFAULT_FILE_RECORD,
47
49
  session=session,
@@ -203,9 +203,10 @@ class UDFDispatcher:
203
203
  def get_inputs() -> Iterable["RowsOutput"]:
204
204
  warehouse = self.catalog.warehouse.clone()
205
205
  if ids_only:
206
- yield from warehouse.dataset_rows_select_from_ids(
207
- self.query, input_rows, self.is_batching
208
- )
206
+ for ids in batched(input_rows, DEFAULT_BATCH_SIZE):
207
+ yield from warehouse.dataset_rows_select_from_ids(
208
+ self.query, ids, self.is_batching
209
+ )
209
210
  else:
210
211
  yield from input_rows
211
212
 
@@ -425,8 +426,9 @@ class UDFWorker:
425
426
  warehouse = self.catalog.warehouse.clone()
426
427
  while (batch := get_from_queue(self.task_queue)) != STOP_SIGNAL:
427
428
  if ids_only:
428
- yield from warehouse.dataset_rows_select_from_ids(
429
- self.query, batch, self.is_batching
430
- )
429
+ for ids in batched(batch, DEFAULT_BATCH_SIZE):
430
+ yield from warehouse.dataset_rows_select_from_ids(
431
+ self.query, ids, self.is_batching
432
+ )
431
433
  else:
432
434
  yield from batch
@@ -29,7 +29,7 @@ DatasetExportStatus = Optional[dict[str, Any]]
29
29
  DatasetExportSignedUrls = Optional[list[str]]
30
30
  FileUploadData = Optional[dict[str, Any]]
31
31
  JobData = Optional[dict[str, Any]]
32
-
32
+ JobListData = dict[str, Any]
33
33
  logger = logging.getLogger("datachain")
34
34
 
35
35
  DATASET_ROWS_CHUNK_SIZE = 8192
@@ -402,6 +402,17 @@ class StudioClient:
402
402
  }
403
403
  return self._send_request("datachain/job", data)
404
404
 
405
+ def get_jobs(
406
+ self,
407
+ status: Optional[str] = None,
408
+ limit: int = 20,
409
+ ) -> Response[JobListData]:
410
+ return self._send_request(
411
+ "datachain/jobs",
412
+ {"status": status, "limit": limit} if status else {"limit": limit},
413
+ method="GET",
414
+ )
415
+
405
416
  def cancel_job(
406
417
  self,
407
418
  job_id: str,
datachain/studio.py CHANGED
@@ -3,6 +3,8 @@ import os
3
3
  import sys
4
4
  from typing import TYPE_CHECKING, Optional
5
5
 
6
+ import tabulate
7
+
6
8
  from datachain.config import Config, ConfigLevel
7
9
  from datachain.dataset import QUERY_DATASET_PREFIX
8
10
  from datachain.error import DataChainError
@@ -44,6 +46,10 @@ def process_jobs_args(args: "Namespace"):
44
46
  return cancel_job(args.id, args.team)
45
47
  if args.cmd == "logs":
46
48
  return show_job_logs(args.id, args.team)
49
+
50
+ if args.cmd == "ls":
51
+ return list_jobs(args.status, args.team, args.limit)
52
+
47
53
  raise DataChainError(f"Unknown command '{args.cmd}'.")
48
54
 
49
55
 
@@ -240,13 +246,13 @@ def show_logs_from_client(client, job_id):
240
246
  raise DataChainError(response.message)
241
247
 
242
248
  response_data = response.data
243
- if response_data:
249
+ if response_data and response_data.get("dataset_versions"):
244
250
  dataset_versions = response_data.get("dataset_versions", [])
245
251
  print("\n\n>>>> Dataset versions created during the job:")
246
252
  for version in dataset_versions:
247
253
  print(f" - {version.get('dataset_name')}@v{version.get('version')}")
248
254
  else:
249
- print("No dataset versions created during the job.")
255
+ print("\n\nNo dataset versions created during the job.")
250
256
 
251
257
 
252
258
  def create_job(
@@ -337,6 +343,31 @@ def cancel_job(job_id: str, team_name: Optional[str]):
337
343
  print(f"Job {job_id} canceled")
338
344
 
339
345
 
346
+ def list_jobs(status: Optional[str], team_name: Optional[str], limit: int):
347
+ client = StudioClient(team=team_name)
348
+ response = client.get_jobs(status, limit)
349
+ if not response.ok:
350
+ raise DataChainError(response.message)
351
+
352
+ jobs = response.data.get("jobs", [])
353
+ if not jobs:
354
+ print("No jobs found")
355
+ return
356
+
357
+ rows = [
358
+ {
359
+ "ID": job.get("id"),
360
+ "Name": job.get("name"),
361
+ "Status": job.get("status"),
362
+ "Created at": job.get("created_at"),
363
+ "Created by": job.get("created_by"),
364
+ }
365
+ for job in jobs
366
+ ]
367
+
368
+ print(tabulate.tabulate(rows, headers="keys", tablefmt="grid"))
369
+
370
+
340
371
  def show_job_logs(job_id: str, team_name: Optional[str]):
341
372
  token = Config().read().get("studio", {}).get("token")
342
373
  if not token:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datachain
3
- Version: 0.17.0
3
+ Version: 0.17.2
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License-Expression: Apache-2.0
@@ -44,7 +44,7 @@ Requires-Dist: datamodel-code-generator>=0.25
44
44
  Requires-Dist: Pillow<12,>=10.0.0
45
45
  Requires-Dist: msgpack<2,>=1.0.4
46
46
  Requires-Dist: psutil
47
- Requires-Dist: huggingface_hub
47
+ Requires-Dist: huggingface_hub<0.31
48
48
  Requires-Dist: iterative-telemetry>=0.0.10
49
49
  Requires-Dist: platformdirs
50
50
  Requires-Dist: dvc-studio-client<1,>=0.21
@@ -14,7 +14,7 @@ datachain/progress.py,sha256=lRzxoYP4Qv2XBwD78sOkmYRzHFpZ2ExVNJF8wAeICtY,770
14
14
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
16
16
  datachain/semver.py,sha256=t_3Y5OGLEthrstBwuwrf5pXVquEuRFu3ZoGe3ajfJB8,1715
17
- datachain/studio.py,sha256=kUNm7iUSMKWIkZs8w_b9VrcK45YrsY91dEe6kPz8AWE,10113
17
+ datachain/studio.py,sha256=5MSDk-pM2Na0TlK0eCStmZ47hNBvlY2WPKzCQFmBPpM,10948
18
18
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
19
19
  datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
20
20
  datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
@@ -32,7 +32,7 @@ datachain/cli/commands/misc.py,sha256=c0DmkOLwcDI2YhA8ArOuLJk6aGzSMZCiKL_E2JGibV
32
32
  datachain/cli/commands/query.py,sha256=Xzfgh14nPVH-sclqX1tpZqgfdTugw5s_44v0D33z6FA,1505
33
33
  datachain/cli/commands/show.py,sha256=Cf8wBs12h-xtdOzjU5GTDy2C8rF5HJSF0hDJYER1zH8,1606
34
34
  datachain/cli/parser/__init__.py,sha256=sjCIcosAtZqa0m50GMQHqmCkZSYxKyZNwQ29XwRQlP0,15913
35
- datachain/cli/parser/job.py,sha256=1TfM8qkGn0e-zixnn40Z5-Pv9h_mZncPaDAOyzjIf5k,3601
35
+ datachain/cli/parser/job.py,sha256=dpRY6KQgooCXhQGQiebPuNuGouKk5jclkpafpQ2TDvQ,4373
36
36
  datachain/cli/parser/studio.py,sha256=Y-1OlQGecLVi9QofvWUfSlPd2ISyaESf7QFGZqGsrdw,3609
37
37
  datachain/cli/parser/utils.py,sha256=rETdD-9Hq9A4OolgfT7jQw4aoawtbfmkdtH6E7nkhpI,2888
38
38
  datachain/client/__init__.py,sha256=1kDpCPoibMXi1gExR4lTLc5pi-k6M5TANiwtXkPoLhU,49
@@ -50,7 +50,7 @@ datachain/data_storage/metastore.py,sha256=vo2ab-U_-BKfeFYTmvpbCoMyMZEVxrVqM9Djj
50
50
  datachain/data_storage/schema.py,sha256=asZYz1cg_WKfe2Q-k5W51E2z2CzHU5B4QEDZDMFr8yo,9346
51
51
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
52
52
  datachain/data_storage/sqlite.py,sha256=bwZAB_NUMT2WMv5tPQnnLFA0P-PiQtxzSaQ1q6xDxOU,24590
53
- datachain/data_storage/warehouse.py,sha256=3MFurg3Gm24xalWI03e_uHkznPxfhzTXjX7AhrKMiXQ,31516
53
+ datachain/data_storage/warehouse.py,sha256=RkdX1cunfmpDkRYRdOGNy0kLw7RekIokVl3Dd0i-hrA,31534
54
54
  datachain/diff/__init__.py,sha256=YkGdiDbZIMhAZ2SJ4eSe00HU67VP1P6SL2L_t0ODYMs,9425
55
55
  datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
56
  datachain/fs/reference.py,sha256=A8McpXF0CqbXPqanXuvpKu50YLB3a2ZXA3YAPxtBXSM,914
@@ -108,7 +108,7 @@ datachain/lib/dc/parquet.py,sha256=zYcSgrWwyEDW9UxGUSVdIVsCu15IGEf0xL8KfWQqK94,1
108
108
  datachain/lib/dc/records.py,sha256=J1I69J2gFIBjRTGr2LG-5qn_rTVzRLcr2y3tVDrmHdg,3068
109
109
  datachain/lib/dc/storage.py,sha256=Z0rqBNoss8RVHo8I3Bpc5hJBLTJfmJHRbV0SA_tR5LI,5635
110
110
  datachain/lib/dc/utils.py,sha256=VawOAlJSvAtZbsMg33s5tJe21TRx1Km3QggI1nN6tnw,3984
111
- datachain/lib/dc/values.py,sha256=cBQubhmPNEDMJldUXzGh-UKbdim4P6O2B91Gp39roKw,1389
111
+ datachain/lib/dc/values.py,sha256=7l1n352xWrEdql2NhBcZ3hj8xyPglWiY4qHjFPjn6iw,1428
112
112
  datachain/model/__init__.py,sha256=R9faX5OHV1xh2EW-g2MPedwbtEqt3LodJRyluB-QylI,189
113
113
  datachain/model/bbox.py,sha256=cQNHuQuVsh6bW3n3Hj40F2Cc20cExQ9Lg_q7R2jxUMI,9324
114
114
  datachain/model/pose.py,sha256=rjquA6M-I-Y30Xm6YSkGv1OY52hJZmR2AuxbIpE5uD0,3865
@@ -121,7 +121,7 @@ datachain/model/ultralytics/segment.py,sha256=koq1HASo29isf0in6oSlzmU4IzsmOXe87F
121
121
  datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
122
122
  datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
123
123
  datachain/query/dataset.py,sha256=v9xVYyft06KNvsPrA-j59krZXwRAunKvyYyJsINZpxM,59114
124
- datachain/query/dispatch.py,sha256=xNAZ5HNx48vY8Azhh3j-H_WSeAlRTQKNWKJbOAEhyPY,15398
124
+ datachain/query/dispatch.py,sha256=15M3zlTUFKM6D2ijITX4o5QxCkRe2klkODsIDi3aQOg,15544
125
125
  datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
126
126
  datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
127
127
  datachain/query/queue.py,sha256=v0UeK4ilmdiRoJ5OdjB5qpnHTYDxRP4vhVp5Iw_toaI,3512
@@ -130,7 +130,7 @@ datachain/query/session.py,sha256=3nyOvPmLiA86IdHc3BL6Dt_himtHVvaDz_I1h3hZ_gI,65
130
130
  datachain/query/udf.py,sha256=e753bDJzTNjGFQn1WGTvOAWSwjDbrFI1-_DDWkWN2ls,1343
131
131
  datachain/query/utils.py,sha256=HaSDNH_XGvp_NIcXjcB7j4vJRPi4_tbztDWclYelHY4,1208
132
132
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
133
- datachain/remote/studio.py,sha256=opSos4cG7GdPIJtIuNifYHdn-IpWP8lvN49a_rEuB1Y,13187
133
+ datachain/remote/studio.py,sha256=4oypHea2NYB7br_BJTYPAmh3FH8KV835mq01Pwexrrg,13524
134
134
  datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
135
135
  datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
136
136
  datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
@@ -152,9 +152,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
152
152
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
153
153
  datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
154
154
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
155
- datachain-0.17.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
156
- datachain-0.17.0.dist-info/METADATA,sha256=P5M0UjmMc5FrB37aNkR3qS3hoNwfT8ItegWSAuiOhOU,11331
157
- datachain-0.17.0.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
158
- datachain-0.17.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
159
- datachain-0.17.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
160
- datachain-0.17.0.dist-info/RECORD,,
155
+ datachain-0.17.2.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
156
+ datachain-0.17.2.dist-info/METADATA,sha256=JEQUpUwLjYtMDRL9nRObwbo3bnYo2MNgQVomGf-JBpI,11336
157
+ datachain-0.17.2.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
158
+ datachain-0.17.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
159
+ datachain-0.17.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
160
+ datachain-0.17.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.3.1)
2
+ Generator: setuptools (80.4.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5