datachain 0.30.7__py3-none-any.whl → 0.31.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/remote/studio.py +60 -17
- datachain/studio.py +7 -8
- {datachain-0.30.7.dist-info → datachain-0.31.0.dist-info}/METADATA +1 -1
- {datachain-0.30.7.dist-info → datachain-0.31.0.dist-info}/RECORD +8 -8
- {datachain-0.30.7.dist-info → datachain-0.31.0.dist-info}/WHEEL +0 -0
- {datachain-0.30.7.dist-info → datachain-0.31.0.dist-info}/entry_points.txt +0 -0
- {datachain-0.30.7.dist-info → datachain-0.31.0.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.30.7.dist-info → datachain-0.31.0.dist-info}/top_level.txt +0 -0
datachain/remote/studio.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import base64
|
|
2
1
|
import json
|
|
3
2
|
import logging
|
|
4
3
|
import os
|
|
@@ -7,6 +6,7 @@ from datetime import datetime, timedelta, timezone
|
|
|
7
6
|
from struct import unpack
|
|
8
7
|
from typing import (
|
|
9
8
|
Any,
|
|
9
|
+
BinaryIO,
|
|
10
10
|
Generic,
|
|
11
11
|
Optional,
|
|
12
12
|
TypeVar,
|
|
@@ -30,8 +30,9 @@ DatasetExportStatus = Optional[dict[str, Any]]
|
|
|
30
30
|
DatasetExportSignedUrls = Optional[list[str]]
|
|
31
31
|
FileUploadData = Optional[dict[str, Any]]
|
|
32
32
|
JobData = Optional[dict[str, Any]]
|
|
33
|
-
JobListData = dict[str, Any]
|
|
34
|
-
ClusterListData = dict[str, Any]
|
|
33
|
+
JobListData = list[dict[str, Any]]
|
|
34
|
+
ClusterListData = list[dict[str, Any]]
|
|
35
|
+
|
|
35
36
|
logger = logging.getLogger("datachain")
|
|
36
37
|
|
|
37
38
|
DATASET_ROWS_CHUNK_SIZE = 8192
|
|
@@ -239,6 +240,45 @@ class StudioClient:
|
|
|
239
240
|
|
|
240
241
|
return Response(data, ok, message, response.status_code)
|
|
241
242
|
|
|
243
|
+
def _send_multipart_request(
|
|
244
|
+
self, route: str, files: dict[str, Any], params: Optional[dict[str, Any]] = None
|
|
245
|
+
) -> Response[Any]:
|
|
246
|
+
"""
|
|
247
|
+
Function that communicates with Studio API using multipart/form-data.
|
|
248
|
+
It will raise an exception, and try to retry, if 5xx status code is
|
|
249
|
+
returned, or if Timeout exceptions is thrown from the requests lib
|
|
250
|
+
"""
|
|
251
|
+
import requests
|
|
252
|
+
|
|
253
|
+
# Add team_name to params
|
|
254
|
+
request_params = {**(params or {}), "team_name": self.team}
|
|
255
|
+
|
|
256
|
+
response = requests.post(
|
|
257
|
+
url=f"{self.url}/{route}",
|
|
258
|
+
files=files,
|
|
259
|
+
params=request_params,
|
|
260
|
+
headers={
|
|
261
|
+
"Authorization": f"token {self.token}",
|
|
262
|
+
},
|
|
263
|
+
timeout=self.timeout,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
ok = response.ok
|
|
267
|
+
try:
|
|
268
|
+
data = json.loads(response.content.decode("utf-8"))
|
|
269
|
+
except json.decoder.JSONDecodeError:
|
|
270
|
+
data = {}
|
|
271
|
+
|
|
272
|
+
if not ok:
|
|
273
|
+
if response.status_code == 403:
|
|
274
|
+
message = f"Not authorized for the team {self.team}"
|
|
275
|
+
else:
|
|
276
|
+
message = data.get("message", "")
|
|
277
|
+
else:
|
|
278
|
+
message = ""
|
|
279
|
+
|
|
280
|
+
return Response(data, ok, message, response.status_code)
|
|
281
|
+
|
|
242
282
|
@staticmethod
|
|
243
283
|
def _unpacker_hook(code, data):
|
|
244
284
|
import msgpack
|
|
@@ -409,12 +449,13 @@ class StudioClient:
|
|
|
409
449
|
method="GET",
|
|
410
450
|
)
|
|
411
451
|
|
|
412
|
-
def upload_file(
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
}
|
|
417
|
-
|
|
452
|
+
def upload_file(
|
|
453
|
+
self, file_obj: BinaryIO, file_name: str
|
|
454
|
+
) -> Response[FileUploadData]:
|
|
455
|
+
# Prepare multipart form data
|
|
456
|
+
files = {"file": (file_name, file_obj, "application/octet-stream")}
|
|
457
|
+
|
|
458
|
+
return self._send_multipart_request("datachain/jobs/files", files)
|
|
418
459
|
|
|
419
460
|
def create_job(
|
|
420
461
|
self,
|
|
@@ -449,25 +490,27 @@ class StudioClient:
|
|
|
449
490
|
"cron_expression": cron,
|
|
450
491
|
"credentials_name": credentials_name,
|
|
451
492
|
}
|
|
452
|
-
return self._send_request("datachain/
|
|
493
|
+
return self._send_request("datachain/jobs/", data)
|
|
453
494
|
|
|
454
495
|
def get_jobs(
|
|
455
496
|
self,
|
|
456
497
|
status: Optional[str] = None,
|
|
457
498
|
limit: int = 20,
|
|
499
|
+
job_id: Optional[str] = None,
|
|
458
500
|
) -> Response[JobListData]:
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
501
|
+
params: dict[str, Any] = {"limit": limit}
|
|
502
|
+
if status is not None:
|
|
503
|
+
params["status"] = status
|
|
504
|
+
if job_id is not None:
|
|
505
|
+
params["job_id"] = job_id
|
|
506
|
+
return self._send_request("datachain/jobs/", params, method="GET")
|
|
464
507
|
|
|
465
508
|
def cancel_job(
|
|
466
509
|
self,
|
|
467
510
|
job_id: str,
|
|
468
511
|
) -> Response[JobData]:
|
|
469
|
-
url = f"datachain/
|
|
512
|
+
url = f"datachain/jobs/{job_id}/cancel"
|
|
470
513
|
return self._send_request(url, data={}, method="POST")
|
|
471
514
|
|
|
472
515
|
def get_clusters(self) -> Response[ClusterListData]:
|
|
473
|
-
return self._send_request("datachain/clusters", {}, method="GET")
|
|
516
|
+
return self._send_request("datachain/clusters/", {}, method="GET")
|
datachain/studio.py
CHANGED
|
@@ -403,14 +403,14 @@ def create_job(
|
|
|
403
403
|
if not response.data:
|
|
404
404
|
raise DataChainError("Failed to create job")
|
|
405
405
|
|
|
406
|
-
job_id = response.data.get("
|
|
406
|
+
job_id = response.data.get("id")
|
|
407
407
|
|
|
408
408
|
if parsed_start_time or cron:
|
|
409
409
|
print(f"Job {job_id} is scheduled as a task in Studio.")
|
|
410
410
|
return 0
|
|
411
411
|
|
|
412
412
|
print(f"Job {job_id} created")
|
|
413
|
-
print("Open the job in Studio at", response.data.get("
|
|
413
|
+
print("Open the job in Studio at", response.data.get("url"))
|
|
414
414
|
print("=" * 40)
|
|
415
415
|
|
|
416
416
|
return 0 if no_wait else show_logs_from_client(client, job_id)
|
|
@@ -421,16 +421,14 @@ def upload_files(client: StudioClient, files: list[str]) -> list[str]:
|
|
|
421
421
|
for file in files:
|
|
422
422
|
file_name = os.path.basename(file)
|
|
423
423
|
with open(file, "rb") as f:
|
|
424
|
-
|
|
425
|
-
response = client.upload_file(file_content, file_name)
|
|
424
|
+
response = client.upload_file(f, file_name)
|
|
426
425
|
if not response.ok:
|
|
427
426
|
raise DataChainError(response.message)
|
|
428
427
|
|
|
429
428
|
if not response.data:
|
|
430
429
|
raise DataChainError(f"Failed to upload file {file_name}")
|
|
431
430
|
|
|
432
|
-
file_id
|
|
433
|
-
if file_id:
|
|
431
|
+
if file_id := response.data.get("id"):
|
|
434
432
|
file_ids.append(str(file_id))
|
|
435
433
|
return file_ids
|
|
436
434
|
|
|
@@ -456,7 +454,7 @@ def list_jobs(status: Optional[str], team_name: Optional[str], limit: int):
|
|
|
456
454
|
if not response.ok:
|
|
457
455
|
raise DataChainError(response.message)
|
|
458
456
|
|
|
459
|
-
jobs = response.data
|
|
457
|
+
jobs = response.data or []
|
|
460
458
|
if not jobs:
|
|
461
459
|
print("No jobs found")
|
|
462
460
|
return
|
|
@@ -492,7 +490,7 @@ def list_clusters(team_name: Optional[str]):
|
|
|
492
490
|
if not response.ok:
|
|
493
491
|
raise DataChainError(response.message)
|
|
494
492
|
|
|
495
|
-
clusters = response.data
|
|
493
|
+
clusters = response.data or []
|
|
496
494
|
if not clusters:
|
|
497
495
|
print("No clusters found")
|
|
498
496
|
return
|
|
@@ -505,6 +503,7 @@ def list_clusters(team_name: Optional[str]):
|
|
|
505
503
|
"Cloud Provider": cluster.get("cloud_provider"),
|
|
506
504
|
"Cloud Credentials": cluster.get("cloud_credentials"),
|
|
507
505
|
"Is Active": cluster.get("is_active"),
|
|
506
|
+
"Is Default": cluster.get("default"),
|
|
508
507
|
"Max Workers": cluster.get("max_workers"),
|
|
509
508
|
}
|
|
510
509
|
for cluster in clusters
|
|
@@ -17,7 +17,7 @@ datachain/project.py,sha256=90D4GpJSA3t0fayYZbzrL3sk4U7EJhQo8psnWvdI7_o,2280
|
|
|
17
17
|
datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
datachain/script_meta.py,sha256=V-LaFOZG84pD0Zc0NvejYdzwDgzITv6yHvAHggDCnuY,4978
|
|
19
19
|
datachain/semver.py,sha256=UB8GHPBtAP3UJGeiuJoInD7SK-DnB93_Xd1qy_CQ9cU,2074
|
|
20
|
-
datachain/studio.py,sha256=
|
|
20
|
+
datachain/studio.py,sha256=IS8o4BZnhUo73Bd8m4CJxFc5utdmh2miIs25WswkFBA,15283
|
|
21
21
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
22
22
|
datachain/utils.py,sha256=5ehFeqXau7MFmGUQRsjRyPfDMPoOF1ojpfVciYUo5fE,15659
|
|
23
23
|
datachain/catalog/__init__.py,sha256=9NBaywvAOaXdkyqiHjbBEiXs7JImR1OJsY9r8D5Q16g,403
|
|
@@ -136,7 +136,7 @@ datachain/query/session.py,sha256=gKblltJAVQAVSTswAgWGDgGbpmFlFzFVkIQojDCjgXM,68
|
|
|
136
136
|
datachain/query/udf.py,sha256=jqutTpvkT6eHl96ZEgYiiTMAhI7vmTQA6JH9y4WCibI,1405
|
|
137
137
|
datachain/query/utils.py,sha256=a2PTBZ3qsG6XlUcp9XsoGiQfKkca4Q3m-VzFgiGQPAc,1230
|
|
138
138
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
139
|
-
datachain/remote/studio.py,sha256=
|
|
139
|
+
datachain/remote/studio.py,sha256=amjcV0B8qumsVBnxPQnt8oSrnfMK2vAdOurVMA9L_zA,16868
|
|
140
140
|
datachain/sql/__init__.py,sha256=8D2omsBiATt8bjLjGo6jBEtaKEkOlnlNFWhVryHMDv0,388
|
|
141
141
|
datachain/sql/postgresql_dialect.py,sha256=pDTfH8xaXz5xZsq8O1aQUvWLRIv_ogYeAqtmKlPp3Rw,280
|
|
142
142
|
datachain/sql/postgresql_types.py,sha256=ryb_0lzuA9UOJ_B6nW9Yb8nJjzeSmEItAL_Ceue65lc,627
|
|
@@ -160,9 +160,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
160
160
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
161
161
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
162
162
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
163
|
-
datachain-0.
|
|
164
|
-
datachain-0.
|
|
165
|
-
datachain-0.
|
|
166
|
-
datachain-0.
|
|
167
|
-
datachain-0.
|
|
168
|
-
datachain-0.
|
|
163
|
+
datachain-0.31.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
164
|
+
datachain-0.31.0.dist-info/METADATA,sha256=hY_KVFdUHZmZcxRiy5e-GY6CXI-sY0oKAtrvNakApdY,13898
|
|
165
|
+
datachain-0.31.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
166
|
+
datachain-0.31.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
167
|
+
datachain-0.31.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
168
|
+
datachain-0.31.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|