datachain 0.19.2__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/namespace.py ADDED
@@ -0,0 +1,65 @@
1
+ import builtins
2
+ from dataclasses import dataclass, fields
3
+ from datetime import datetime
4
+ from typing import Any, Optional, TypeVar
5
+
6
+ from datachain.error import InvalidNamespaceNameError
7
+
8
+ N = TypeVar("N", bound="Namespace")
9
+ NAMESPACE_NAME_RESERVED_CHARS = ["."]
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class Namespace:
14
+ id: int
15
+ uuid: str
16
+ name: str
17
+ description: Optional[str]
18
+ created_at: datetime
19
+
20
+ @staticmethod
21
+ def validate_name(name: str) -> None:
22
+ """Throws exception if name is invalid, otherwise returns None"""
23
+ if not name:
24
+ raise InvalidNamespaceNameError("Namespace name cannot be empty")
25
+
26
+ for c in NAMESPACE_NAME_RESERVED_CHARS:
27
+ if c in name:
28
+ raise InvalidNamespaceNameError(
29
+ f"Character {c} is reserved and not allowed in namespace name"
30
+ )
31
+
32
+ if name in [Namespace.default(), Namespace.system()]:
33
+ raise InvalidNamespaceNameError(
34
+ f"Namespace name {name} is reserved and cannot be used."
35
+ )
36
+
37
+ @staticmethod
38
+ def default() -> str:
39
+ """Name of default namespace"""
40
+ return "local"
41
+
42
+ @staticmethod
43
+ def system() -> str:
44
+ """Name of the system namespace"""
45
+ return "system"
46
+
47
+ @property
48
+ def is_system(self):
49
+ return self.name == Namespace.system()
50
+
51
+ @classmethod
52
+ def parse(
53
+ cls: builtins.type[N],
54
+ id: int,
55
+ uuid: str,
56
+ name: str,
57
+ description: Optional[str],
58
+ created_at: datetime,
59
+ ) -> "Namespace":
60
+ return cls(id, uuid, name, description, created_at)
61
+
62
+ @classmethod
63
+ def from_dict(cls, d: dict[str, Any]) -> "Namespace":
64
+ kwargs = {f.name: d[f.name] for f in fields(cls) if f.name in d}
65
+ return cls(**kwargs)
datachain/project.py ADDED
@@ -0,0 +1,78 @@
1
+ import builtins
2
+ from dataclasses import dataclass, fields
3
+ from datetime import datetime
4
+ from typing import Any, Optional, TypeVar
5
+
6
+ from datachain.error import InvalidProjectNameError
7
+ from datachain.namespace import Namespace
8
+
9
+ P = TypeVar("P", bound="Project")
10
+ PROJECT_NAME_RESERVED_CHARS = ["."]
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class Project:
15
+ id: int
16
+ uuid: str
17
+ name: str
18
+ description: Optional[str]
19
+ created_at: datetime
20
+ namespace: Namespace
21
+
22
+ @staticmethod
23
+ def validate_name(name: str) -> None:
24
+ """Throws exception if name is invalid, otherwise returns None"""
25
+ if not name:
26
+ raise InvalidProjectNameError("Project name cannot be empty")
27
+
28
+ for c in PROJECT_NAME_RESERVED_CHARS:
29
+ if c in name:
30
+ raise InvalidProjectNameError(
31
+ f"Character {c} is reserved and not allowed in project name."
32
+ )
33
+
34
+ if name in [Project.default(), Project.listing()]:
35
+ raise InvalidProjectNameError(
36
+ f"Project name {name} is reserved and cannot be used."
37
+ )
38
+
39
+ @staticmethod
40
+ def default() -> str:
41
+ """Name of default project"""
42
+ return "local"
43
+
44
+ @staticmethod
45
+ def listing() -> str:
46
+ """Name of listing project where all listing datasets will be saved"""
47
+ return "listing"
48
+
49
+ @classmethod
50
+ def parse(
51
+ cls: builtins.type[P],
52
+ namespace_id: int,
53
+ namespace_uuid: str,
54
+ namespace_name: str,
55
+ namespace_description: Optional[str],
56
+ namespace_created_at: datetime,
57
+ project_id: int,
58
+ uuid: str,
59
+ name: str,
60
+ description: Optional[str],
61
+ created_at: datetime,
62
+ project_namespace_id: int,
63
+ ) -> "Project":
64
+ namespace = Namespace.parse(
65
+ namespace_id,
66
+ namespace_uuid,
67
+ namespace_name,
68
+ namespace_description,
69
+ namespace_created_at,
70
+ )
71
+
72
+ return cls(project_id, uuid, name, description, created_at, namespace)
73
+
74
+ @classmethod
75
+ def from_dict(cls, d: dict[str, Any]) -> "Project":
76
+ namespace = Namespace.from_dict(d.pop("namespace"))
77
+ kwargs = {f.name: d[f.name] for f in fields(cls) if f.name in d}
78
+ return cls(**kwargs, namespace=namespace)
@@ -41,12 +41,13 @@ from datachain.data_storage.schema import (
41
41
  partition_col_names,
42
42
  partition_columns,
43
43
  )
44
- from datachain.dataset import DATASET_PREFIX, DatasetDependency, DatasetStatus, RowDict
44
+ from datachain.dataset import DatasetDependency, DatasetStatus, RowDict
45
45
  from datachain.error import DatasetNotFoundError, QueryScriptCancelError
46
46
  from datachain.func.base import Function
47
47
  from datachain.lib.listing import is_listing_dataset, listing_dataset_expired
48
48
  from datachain.lib.udf import UDFAdapter, _get_cache
49
49
  from datachain.progress import CombinedDownloadCallback, TqdmCombinedDownloadCallback
50
+ from datachain.project import Project
50
51
  from datachain.query.schema import C, UDFParamSpec, normalize_param
51
52
  from datachain.query.session import Session
52
53
  from datachain.query.udf import UdfInfo
@@ -83,7 +84,7 @@ PartitionByType = Union[
83
84
  Function, ColumnElement, Sequence[Union[Function, ColumnElement]]
84
85
  ]
85
86
  JoinPredicateType = Union[str, ColumnClause, ColumnElement]
86
- DatasetDependencyType = tuple[str, str]
87
+ DatasetDependencyType = tuple["DatasetRecord", str]
87
88
 
88
89
  logger = logging.getLogger("datachain")
89
90
 
@@ -169,18 +170,17 @@ class QueryStep:
169
170
  """A query that returns all rows from specific dataset version"""
170
171
 
171
172
  catalog: "Catalog"
172
- dataset_name: str
173
+ dataset: "DatasetRecord"
173
174
  dataset_version: str
174
175
 
175
176
  def apply(self) -> "StepResult":
176
177
  def q(*columns):
177
178
  return sqlalchemy.select(*columns)
178
179
 
179
- dataset = self.catalog.get_dataset(self.dataset_name)
180
- dr = self.catalog.warehouse.dataset_rows(dataset, self.dataset_version)
180
+ dr = self.catalog.warehouse.dataset_rows(self.dataset, self.dataset_version)
181
181
 
182
182
  return step_result(
183
- q, dr.columns, dependencies=[(self.dataset_name, self.dataset_version)]
183
+ q, dr.columns, dependencies=[(self.dataset, self.dataset_version)]
184
184
  )
185
185
 
186
186
 
@@ -1095,6 +1095,8 @@ class DatasetQuery:
1095
1095
  self,
1096
1096
  name: str,
1097
1097
  version: Optional[str] = None,
1098
+ project_name: Optional[str] = None,
1099
+ namespace_name: Optional[str] = None,
1098
1100
  catalog: Optional["Catalog"] = None,
1099
1101
  session: Optional[Session] = None,
1100
1102
  indexing_column_types: Optional[dict[str, Any]] = None,
@@ -1128,33 +1130,38 @@ class DatasetQuery:
1128
1130
  if version:
1129
1131
  self.version = version
1130
1132
 
1131
- if is_listing_dataset(name):
1132
- if version:
1133
- # this listing dataset should already be listed as we specify
1134
- # exact version
1135
- self._set_starting_step(self.catalog.get_dataset(name))
1136
- else:
1137
- # not setting query step yet as listing dataset might not exist at
1138
- # this point
1139
- self.list_ds_name = name
1133
+ namespace_name = namespace_name or self.catalog.metastore.default_namespace_name
1134
+ project_name = project_name or self.catalog.metastore.default_project_name
1135
+
1136
+ if is_listing_dataset(name) and not version:
1137
+ # not setting query step yet as listing dataset might not exist at
1138
+ # this point
1139
+ self.list_ds_name = name
1140
1140
  elif fallback_to_studio and is_token_set():
1141
1141
  self._set_starting_step(
1142
- self.catalog.get_dataset_with_remote_fallback(name, version)
1142
+ self.catalog.get_dataset_with_remote_fallback(
1143
+ name,
1144
+ namespace_name=namespace_name,
1145
+ project_name=project_name,
1146
+ version=version,
1147
+ )
1143
1148
  )
1144
1149
  else:
1145
- self._set_starting_step(self.catalog.get_dataset(name))
1150
+ project = self.catalog.metastore.get_project(project_name, namespace_name)
1151
+ self._set_starting_step(self.catalog.get_dataset(name, project=project))
1146
1152
 
1147
1153
  def _set_starting_step(self, ds: "DatasetRecord") -> None:
1148
1154
  if not self.version:
1149
1155
  self.version = ds.latest_version
1150
1156
 
1151
- self.starting_step = QueryStep(self.catalog, ds.name, self.version)
1157
+ self.starting_step = QueryStep(self.catalog, ds, self.version)
1152
1158
 
1153
1159
  # at this point we know our starting dataset so setting up schemas
1154
1160
  self.feature_schema = ds.get_version(self.version).feature_schema
1155
1161
  self.column_types = copy(ds.schema)
1156
1162
  if "sys__id" in self.column_types:
1157
1163
  self.column_types.pop("sys__id")
1164
+ self.project = ds.project
1158
1165
 
1159
1166
  def __iter__(self):
1160
1167
  return iter(self.db_results())
@@ -1162,21 +1169,6 @@ class DatasetQuery:
1162
1169
  def __or__(self, other):
1163
1170
  return self.union(other)
1164
1171
 
1165
- def pull_dataset(self, name: str, version: Optional[str] = None) -> "DatasetRecord":
1166
- print("Dataset not found in local catalog, trying to get from studio")
1167
-
1168
- remote_ds_uri = f"{DATASET_PREFIX}{name}"
1169
- if version:
1170
- remote_ds_uri += f"@v{version}"
1171
-
1172
- self.catalog.pull_dataset(
1173
- remote_ds_uri=remote_ds_uri,
1174
- local_ds_name=name,
1175
- local_ds_version=version,
1176
- )
1177
-
1178
- return self.catalog.get_dataset(name)
1179
-
1180
1172
  @staticmethod
1181
1173
  def get_table() -> "TableClause":
1182
1174
  table_name = "".join(
@@ -1657,6 +1649,8 @@ class DatasetQuery:
1657
1649
  workers: Union[bool, int] = False,
1658
1650
  min_task_size: Optional[int] = None,
1659
1651
  partition_by: Optional[PartitionByType] = None,
1652
+ namespace: Optional[str] = None,
1653
+ project: Optional[str] = None,
1660
1654
  cache: bool = False,
1661
1655
  ) -> "Self":
1662
1656
  query = self.clone()
@@ -1676,26 +1670,36 @@ class DatasetQuery:
1676
1670
 
1677
1671
  def _add_dependencies(self, dataset: "DatasetRecord", version: str):
1678
1672
  dependencies: set[DatasetDependencyType] = set()
1679
- for dep_name, dep_version in self.dependencies:
1680
- if Session.is_temp_dataset(dep_name):
1673
+ for dep_dataset, dep_dataset_version in self.dependencies:
1674
+ if Session.is_temp_dataset(dep_dataset.name):
1681
1675
  # temp dataset are created for optimization and they will be removed
1682
1676
  # afterwards. Therefore, we should not put them as dependencies, but
1683
1677
  # their own direct dependencies
1684
1678
  for dep in self.catalog.get_dataset_dependencies(
1685
- dep_name, dep_version, indirect=False
1679
+ dep_dataset.name,
1680
+ dep_dataset_version,
1681
+ dep_dataset.project,
1682
+ indirect=False,
1686
1683
  ):
1687
1684
  if dep:
1688
- dependencies.add((dep.name, dep.version))
1685
+ dep_project = self.catalog.metastore.get_project(
1686
+ dep.project, dep.namespace
1687
+ )
1688
+ dependencies.add(
1689
+ (
1690
+ self.catalog.get_dataset(dep.name, dep_project),
1691
+ dep.version,
1692
+ )
1693
+ )
1689
1694
  else:
1690
- dependencies.add((dep_name, dep_version))
1695
+ dependencies.add((dep_dataset, dep_dataset_version))
1691
1696
 
1692
- for dep_name, dep_version in dependencies:
1693
- # ds_dependency_name, ds_dependency_version = dependency
1697
+ for dep_dataset, dep_dataset_version in dependencies:
1694
1698
  self.catalog.metastore.add_dataset_dependency(
1695
- dataset.name,
1699
+ dataset,
1696
1700
  version,
1697
- dep_name,
1698
- dep_version,
1701
+ dep_dataset,
1702
+ dep_dataset_version,
1699
1703
  )
1700
1704
 
1701
1705
  def exec(self) -> "Self":
@@ -1711,6 +1715,7 @@ class DatasetQuery:
1711
1715
  self,
1712
1716
  name: Optional[str] = None,
1713
1717
  version: Optional[str] = None,
1718
+ project: Optional[Project] = None,
1714
1719
  feature_schema: Optional[dict] = None,
1715
1720
  dependencies: Optional[list[DatasetDependency]] = None,
1716
1721
  description: Optional[str] = None,
@@ -1719,8 +1724,13 @@ class DatasetQuery:
1719
1724
  **kwargs,
1720
1725
  ) -> "Self":
1721
1726
  """Save the query as a dataset."""
1727
+ project = project or self.catalog.metastore.default_project
1722
1728
  try:
1723
- if name and version and self.catalog.get_dataset(name).has_version(version):
1729
+ if (
1730
+ name
1731
+ and version
1732
+ and self.catalog.get_dataset(name, project).has_version(version)
1733
+ ):
1724
1734
  raise RuntimeError(f"Dataset {name} already has version {version}")
1725
1735
  except DatasetNotFoundError:
1726
1736
  pass
@@ -1745,6 +1755,7 @@ class DatasetQuery:
1745
1755
 
1746
1756
  dataset = self.catalog.create_dataset(
1747
1757
  name,
1758
+ project,
1748
1759
  version=version,
1749
1760
  feature_schema=feature_schema,
1750
1761
  columns=columns,
@@ -1770,11 +1781,25 @@ class DatasetQuery:
1770
1781
 
1771
1782
  if dependencies:
1772
1783
  # overriding dependencies
1773
- self.dependencies = {(dep.name, dep.version) for dep in dependencies}
1784
+ self.dependencies = set()
1785
+ for dep in dependencies:
1786
+ dep_project = self.catalog.metastore.get_project(
1787
+ dep.project, dep.namespace
1788
+ )
1789
+ self.dependencies.add(
1790
+ (self.catalog.get_dataset(dep.name, dep_project), dep.version)
1791
+ )
1792
+
1774
1793
  self._add_dependencies(dataset, version) # type: ignore [arg-type]
1775
1794
  finally:
1776
1795
  self.cleanup()
1777
- return self.__class__(name=name, version=version, catalog=self.catalog)
1796
+ return self.__class__(
1797
+ name=name,
1798
+ namespace_name=project.namespace.name,
1799
+ project_name=project.name,
1800
+ version=version,
1801
+ catalog=self.catalog,
1802
+ )
1778
1803
 
1779
1804
  @property
1780
1805
  def is_ordered(self) -> bool:
@@ -108,7 +108,7 @@ class Session:
108
108
  prefix = self.get_temp_prefix()
109
109
  try:
110
110
  for dataset in list(self.catalog.metastore.list_datasets_by_prefix(prefix)):
111
- self.catalog.remove_dataset(dataset.name, force=True)
111
+ self.catalog.remove_dataset(dataset.name, dataset.project, force=True)
112
112
  # suppress error when metastore has been reset during testing
113
113
  except TableMissingError:
114
114
  pass
@@ -17,6 +17,7 @@ import websockets
17
17
  from requests.exceptions import HTTPError, Timeout
18
18
 
19
19
  from datachain.config import Config
20
+ from datachain.dataset import DatasetRecord
20
21
  from datachain.error import DataChainError
21
22
  from datachain.utils import STUDIO_URL, retry_with_backoff
22
23
 
@@ -36,13 +37,33 @@ logger = logging.getLogger("datachain")
36
37
  DATASET_ROWS_CHUNK_SIZE = 8192
37
38
 
38
39
 
40
+ def get_studio_env_variable(name: str) -> Any:
41
+ """
42
+ Get the value of a DataChain Studio environment variable.
43
+ It first checks for the variable prefixed with 'DATACHAIN_STUDIO_',
44
+ then checks for the deprecated 'DVC_STUDIO_' prefix.
45
+ If neither is set, it returns the provided default value.
46
+ """
47
+ if (value := os.environ.get(f"DATACHAIN_STUDIO_{name}")) is not None:
48
+ return value
49
+ if (value := os.environ.get(f"DVC_STUDIO_{name}")) is not None: # deprecated
50
+ logger.warning(
51
+ "Environment variable 'DVC_STUDIO_%s' is deprecated, "
52
+ "use 'DATACHAIN_STUDIO_%s' instead.",
53
+ name,
54
+ name,
55
+ )
56
+ return value
57
+ return None
58
+
59
+
39
60
  def _is_server_error(status_code: int) -> bool:
40
61
  return str(status_code).startswith("5")
41
62
 
42
63
 
43
64
  def is_token_set() -> bool:
44
65
  return (
45
- bool(os.environ.get("DVC_STUDIO_TOKEN"))
66
+ bool(get_studio_env_variable("TOKEN"))
46
67
  or Config().read().get("studio", {}).get("token") is not None
47
68
  )
48
69
 
@@ -78,12 +99,12 @@ class StudioClient:
78
99
 
79
100
  @property
80
101
  def token(self) -> str:
81
- token = os.environ.get("DVC_STUDIO_TOKEN") or self.config.get("token")
102
+ token = get_studio_env_variable("TOKEN") or self.config.get("token")
82
103
 
83
104
  if not token:
84
105
  raise DataChainError(
85
106
  "Studio token is not set. Use `datachain auth login` "
86
- "or environment variable `DVC_STUDIO_TOKEN` to set it."
107
+ "or environment variable `DATACHAIN_STUDIO_TOKEN` to set it."
87
108
  )
88
109
 
89
110
  return token
@@ -91,8 +112,8 @@ class StudioClient:
91
112
  @property
92
113
  def url(self) -> str:
93
114
  return (
94
- os.environ.get("DVC_STUDIO_URL") or self.config.get("url") or STUDIO_URL
95
- ) + "/api"
115
+ get_studio_env_variable("URL") or self.config.get("url") or STUDIO_URL
116
+ ).rstrip("/") + "/api"
96
117
 
97
118
  @property
98
119
  def config(self) -> dict:
@@ -107,13 +128,13 @@ class StudioClient:
107
128
  return self._team
108
129
 
109
130
  def _get_team(self) -> str:
110
- team = os.environ.get("DVC_STUDIO_TEAM") or self.config.get("team")
131
+ team = get_studio_env_variable("TEAM") or self.config.get("team")
111
132
 
112
133
  if not team:
113
134
  raise DataChainError(
114
135
  "Studio team is not set. "
115
136
  "Use `datachain auth team <team_name>` "
116
- "or environment variable `DVC_STUDIO_TEAM` to set it. "
137
+ "or environment variable `DATACHAIN_STUDIO_TEAM` to set it. "
117
138
  "You can also set `studio.team` in the config file."
118
139
  )
119
140
 
@@ -291,13 +312,17 @@ class StudioClient:
291
312
  def edit_dataset(
292
313
  self,
293
314
  name: str,
315
+ namespace: str,
316
+ project: str,
294
317
  new_name: Optional[str] = None,
295
318
  description: Optional[str] = None,
296
319
  attrs: Optional[list[str]] = None,
297
320
  ) -> Response[DatasetInfoData]:
298
321
  body = {
299
322
  "new_name": new_name,
300
- "dataset_name": name,
323
+ "name": name,
324
+ "namespace": namespace,
325
+ "project": project,
301
326
  "description": description,
302
327
  "attrs": attrs,
303
328
  }
@@ -310,44 +335,44 @@ class StudioClient:
310
335
  def rm_dataset(
311
336
  self,
312
337
  name: str,
338
+ namespace: str,
339
+ project: str,
313
340
  version: Optional[str] = None,
314
341
  force: Optional[bool] = False,
315
342
  ) -> Response[DatasetInfoData]:
316
343
  return self._send_request(
317
344
  "datachain/datasets",
318
345
  {
319
- "dataset_name": name,
320
- "dataset_version": version,
346
+ "name": name,
347
+ "namespace": namespace,
348
+ "project": project,
349
+ "version": version,
321
350
  "force": force,
322
351
  },
323
352
  method="DELETE",
324
353
  )
325
354
 
326
- def dataset_info(self, name: str) -> Response[DatasetInfoData]:
355
+ def dataset_info(
356
+ self, namespace: str, project: str, name: str
357
+ ) -> Response[DatasetInfoData]:
327
358
  def _parse_dataset_info(dataset_info):
328
359
  _parse_dates(dataset_info, ["created_at", "finished_at"])
329
360
  for version in dataset_info.get("versions"):
330
361
  _parse_dates(version, ["created_at"])
362
+ _parse_dates(dataset_info.get("project"), ["created_at"])
363
+ _parse_dates(dataset_info.get("project").get("namespace"), ["created_at"])
331
364
 
332
365
  return dataset_info
333
366
 
334
367
  response = self._send_request(
335
- "datachain/datasets/info", {"dataset_name": name}, method="GET"
368
+ "datachain/datasets/info",
369
+ {"namespace": namespace, "project": project, "name": name},
370
+ method="GET",
336
371
  )
337
372
  if response.ok:
338
373
  response.data = _parse_dataset_info(response.data)
339
374
  return response
340
375
 
341
- def dataset_rows_chunk(
342
- self, name: str, version: str, offset: int
343
- ) -> Response[DatasetRowsData]:
344
- req_data = {"dataset_name": name, "dataset_version": version}
345
- return self._send_request_msgpack(
346
- "datachain/datasets/rows",
347
- {**req_data, "offset": offset, "limit": DATASET_ROWS_CHUNK_SIZE},
348
- method="GET",
349
- )
350
-
351
376
  def dataset_job_versions(self, job_id: str) -> Response[DatasetJobVersionsData]:
352
377
  return self._send_request(
353
378
  "datachain/datasets/dataset_job_versions",
@@ -356,20 +381,30 @@ class StudioClient:
356
381
  )
357
382
 
358
383
  def export_dataset_table(
359
- self, name: str, version: str
384
+ self, dataset: DatasetRecord, version: str
360
385
  ) -> Response[DatasetExportSignedUrls]:
361
386
  return self._send_request(
362
387
  "datachain/datasets/export",
363
- {"dataset_name": name, "dataset_version": version},
388
+ {
389
+ "namespace": dataset.project.namespace.name,
390
+ "project": dataset.project.name,
391
+ "name": dataset.name,
392
+ "version": version,
393
+ },
364
394
  method="GET",
365
395
  )
366
396
 
367
397
  def dataset_export_status(
368
- self, name: str, version: str
398
+ self, dataset: DatasetRecord, version: str
369
399
  ) -> Response[DatasetExportStatus]:
370
400
  return self._send_request(
371
401
  "datachain/datasets/export-status",
372
- {"dataset_name": name, "dataset_version": version},
402
+ {
403
+ "namespace": dataset.project.namespace.name,
404
+ "project": dataset.project.name,
405
+ "name": dataset.name,
406
+ "version": version,
407
+ },
373
408
  method="GET",
374
409
  )
375
410
 
@@ -392,7 +427,7 @@ class StudioClient:
392
427
  requirements: Optional[str] = None,
393
428
  repository: Optional[str] = None,
394
429
  priority: Optional[int] = None,
395
- cluster_id: Optional[int] = None,
430
+ cluster: Optional[str] = None,
396
431
  ) -> Response[JobData]:
397
432
  data = {
398
433
  "query": query,
@@ -405,7 +440,7 @@ class StudioClient:
405
440
  "requirements": requirements,
406
441
  "repository": repository,
407
442
  "priority": priority,
408
- "compute_cluster_id": cluster_id,
443
+ "compute_cluster_name": cluster,
409
444
  }
410
445
  return self._send_request("datachain/job", data)
411
446