datachain 0.30.2__py3-none-any.whl → 0.30.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/__init__.py CHANGED
@@ -6,6 +6,7 @@ from datachain.lib.dc import (
6
6
  Sys,
7
7
  datasets,
8
8
  delete_dataset,
9
+ is_studio,
9
10
  listings,
10
11
  move_dataset,
11
12
  read_csv,
@@ -74,6 +75,7 @@ __all__ = [
74
75
  "datasets",
75
76
  "delete_dataset",
76
77
  "is_chain_type",
78
+ "is_studio",
77
79
  "listings",
78
80
  "metrics",
79
81
  "move_dataset",
@@ -3,6 +3,7 @@ from .catalog import (
3
3
  QUERY_SCRIPT_CANCELED_EXIT_CODE,
4
4
  QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE,
5
5
  Catalog,
6
+ is_namespace_local,
6
7
  )
7
8
  from .loader import get_catalog
8
9
 
@@ -12,4 +13,5 @@ __all__ = [
12
13
  "QUERY_SCRIPT_INVALID_LAST_STATEMENT_EXIT_CODE",
13
14
  "Catalog",
14
15
  "get_catalog",
16
+ "is_namespace_local",
15
17
  ]
@@ -113,6 +113,11 @@ else:
113
113
  SIGINT = signal.SIGINT
114
114
 
115
115
 
116
+ def is_namespace_local(namespace_name) -> bool:
117
+ """Checks if namespace is from local environment, i.e. is `local`"""
118
+ return namespace_name == "local"
119
+
120
+
116
121
  def shutdown_process(
117
122
  proc: subprocess.Popen,
118
123
  interrupt_timeout: Optional[int] = None,
@@ -680,8 +685,9 @@ class Catalog:
680
685
  ds_namespace, ds_project, ds_name = parse_dataset_name(ds_name)
681
686
  assert ds_namespace
682
687
  assert ds_project
683
- project = self.metastore.get_project(ds_project, ds_namespace)
684
- dataset = self.get_dataset(ds_name, project)
688
+ dataset = self.get_dataset(
689
+ ds_name, namespace_name=ds_namespace, project_name=ds_project
690
+ )
685
691
  if not ds_version:
686
692
  ds_version = dataset.latest_version
687
693
  dataset_sources = self.warehouse.get_dataset_sources(
@@ -807,7 +813,11 @@ class Catalog:
807
813
  )
808
814
  default_version = DEFAULT_DATASET_VERSION
809
815
  try:
810
- dataset = self.get_dataset(name, project)
816
+ dataset = self.get_dataset(
817
+ name,
818
+ namespace_name=project.namespace.name if project else None,
819
+ project_name=project.name if project else None,
820
+ )
811
821
  default_version = dataset.next_version_patch
812
822
  if update_version == "major":
813
823
  default_version = dataset.next_version_major
@@ -1016,7 +1026,11 @@ class Catalog:
1016
1026
  dc.save(name)
1017
1027
  except Exception as e: # noqa: BLE001
1018
1028
  try:
1019
- ds = self.get_dataset(name, project)
1029
+ ds = self.get_dataset(
1030
+ name,
1031
+ namespace_name=project.namespace.name,
1032
+ project_name=project.name,
1033
+ )
1020
1034
  self.metastore.update_dataset_status(
1021
1035
  ds,
1022
1036
  DatasetStatus.FAILED,
@@ -1033,7 +1047,11 @@ class Catalog:
1033
1047
  except DatasetNotFoundError:
1034
1048
  raise e from None
1035
1049
 
1036
- ds = self.get_dataset(name, project)
1050
+ ds = self.get_dataset(
1051
+ name,
1052
+ namespace_name=project.namespace.name,
1053
+ project_name=project.name,
1054
+ )
1037
1055
 
1038
1056
  self.update_dataset_version_with_warehouse_info(
1039
1057
  ds,
@@ -1041,7 +1059,11 @@ class Catalog:
1041
1059
  sources="\n".join(sources),
1042
1060
  )
1043
1061
 
1044
- return self.get_dataset(name, project)
1062
+ return self.get_dataset(
1063
+ name,
1064
+ namespace_name=project.namespace.name,
1065
+ project_name=project.name,
1066
+ )
1045
1067
 
1046
1068
  def get_full_dataset_name(
1047
1069
  self,
@@ -1077,22 +1099,23 @@ class Catalog:
1077
1099
  return namespace_name, project_name, name
1078
1100
 
1079
1101
  def get_dataset(
1080
- self, name: str, project: Optional[Project] = None
1102
+ self,
1103
+ name: str,
1104
+ namespace_name: Optional[str] = None,
1105
+ project_name: Optional[str] = None,
1081
1106
  ) -> DatasetRecord:
1082
1107
  from datachain.lib.listing import is_listing_dataset
1083
1108
 
1084
- project = project or self.metastore.default_project
1109
+ namespace_name = namespace_name or self.metastore.default_namespace_name
1110
+ project_name = project_name or self.metastore.default_project_name
1085
1111
 
1086
1112
  if is_listing_dataset(name):
1087
- project = self.metastore.listing_project
1113
+ namespace_name = self.metastore.system_namespace_name
1114
+ project_name = self.metastore.listing_project_name
1088
1115
 
1089
- try:
1090
- return self.metastore.get_dataset(name, project.id if project else None)
1091
- except DatasetNotFoundError:
1092
- raise DatasetNotFoundError(
1093
- f"Dataset {name} not found in namespace {project.namespace.name}"
1094
- f" and project {project.name}"
1095
- ) from None
1116
+ return self.metastore.get_dataset(
1117
+ name, namespace_name=namespace_name, project_name=project_name
1118
+ )
1096
1119
 
1097
1120
  def get_dataset_with_remote_fallback(
1098
1121
  self,
@@ -1103,6 +1126,8 @@ class Catalog:
1103
1126
  pull_dataset: bool = False,
1104
1127
  update: bool = False,
1105
1128
  ) -> DatasetRecord:
1129
+ from datachain.lib.dc.utils import is_studio
1130
+
1106
1131
  # Intentionally ignore update flag is version is provided. Here only exact
1107
1132
  # version can be provided and update then doesn't make sense.
1108
1133
  # It corresponds to a query like this for example:
@@ -1111,16 +1136,24 @@ class Catalog:
1111
1136
  if version:
1112
1137
  update = False
1113
1138
 
1114
- if self.metastore.is_local_dataset(namespace_name) or not update:
1139
+ # we don't do Studio fallback is script is already ran in Studio, or if we try
1140
+ # to fetch dataset with local namespace as that one cannot
1141
+ # exist in Studio in the first place
1142
+ no_fallback = is_studio() or is_namespace_local(namespace_name)
1143
+
1144
+ if no_fallback or not update:
1115
1145
  try:
1116
- project = self.metastore.get_project(project_name, namespace_name)
1117
- ds = self.get_dataset(name, project)
1146
+ ds = self.get_dataset(
1147
+ name,
1148
+ namespace_name=namespace_name,
1149
+ project_name=project_name,
1150
+ )
1118
1151
  if not version or ds.has_version(version):
1119
1152
  return ds
1120
1153
  except (NamespaceNotFoundError, ProjectNotFoundError, DatasetNotFoundError):
1121
1154
  pass
1122
1155
 
1123
- if self.metastore.is_local_dataset(namespace_name):
1156
+ if no_fallback:
1124
1157
  raise DatasetNotFoundError(
1125
1158
  f"Dataset {name}"
1126
1159
  + (f" version {version} " if version else " ")
@@ -1139,7 +1172,9 @@ class Catalog:
1139
1172
  local_ds_version=version,
1140
1173
  )
1141
1174
  return self.get_dataset(
1142
- name, self.metastore.get_project(project_name, namespace_name)
1175
+ name,
1176
+ namespace_name=namespace_name,
1177
+ project_name=project_name,
1143
1178
  )
1144
1179
 
1145
1180
  return self.get_remote_dataset(namespace_name, project_name, name)
@@ -1148,7 +1183,11 @@ class Catalog:
1148
1183
  """Returns dataset that contains version with specific uuid"""
1149
1184
  for dataset in self.ls_datasets():
1150
1185
  if dataset.has_version_with_uuid(uuid):
1151
- return self.get_dataset(dataset.name, dataset.project)
1186
+ return self.get_dataset(
1187
+ dataset.name,
1188
+ namespace_name=dataset.project.namespace.name,
1189
+ project_name=dataset.project.name,
1190
+ )
1152
1191
  raise DatasetNotFoundError(f"Dataset with version uuid {uuid} not found.")
1153
1192
 
1154
1193
  def get_remote_dataset(
@@ -1171,9 +1210,18 @@ class Catalog:
1171
1210
  return DatasetRecord.from_dict(dataset_info)
1172
1211
 
1173
1212
  def get_dataset_dependencies(
1174
- self, name: str, version: str, project: Optional[Project] = None, indirect=False
1213
+ self,
1214
+ name: str,
1215
+ version: str,
1216
+ namespace_name: Optional[str] = None,
1217
+ project_name: Optional[str] = None,
1218
+ indirect=False,
1175
1219
  ) -> list[Optional[DatasetDependency]]:
1176
- dataset = self.get_dataset(name, project)
1220
+ dataset = self.get_dataset(
1221
+ name,
1222
+ namespace_name=namespace_name,
1223
+ project_name=project_name,
1224
+ )
1177
1225
 
1178
1226
  direct_dependencies = self.metastore.get_direct_dataset_dependencies(
1179
1227
  dataset, version
@@ -1187,10 +1235,13 @@ class Catalog:
1187
1235
  # dependency has been removed
1188
1236
  continue
1189
1237
  if d.is_dataset:
1190
- project = self.metastore.get_project(d.project, d.namespace)
1191
1238
  # only datasets can have dependencies
1192
1239
  d.dependencies = self.get_dataset_dependencies(
1193
- d.name, d.version, project, indirect=indirect
1240
+ d.name,
1241
+ d.version,
1242
+ namespace_name=d.namespace,
1243
+ project_name=d.project,
1244
+ indirect=indirect,
1194
1245
  )
1195
1246
 
1196
1247
  return direct_dependencies
@@ -1340,7 +1391,11 @@ class Catalog:
1340
1391
  project: Optional[Project] = None,
1341
1392
  client_config=None,
1342
1393
  ) -> list[str]:
1343
- dataset = self.get_dataset(name, project)
1394
+ dataset = self.get_dataset(
1395
+ name,
1396
+ namespace_name=project.namespace.name if project else None,
1397
+ project_name=project.name if project else None,
1398
+ )
1344
1399
 
1345
1400
  return self.warehouse.export_dataset_table(
1346
1401
  bucket_uri, dataset, version, client_config
@@ -1349,7 +1404,11 @@ class Catalog:
1349
1404
  def dataset_table_export_file_names(
1350
1405
  self, name: str, version: str, project: Optional[Project] = None
1351
1406
  ) -> list[str]:
1352
- dataset = self.get_dataset(name, project)
1407
+ dataset = self.get_dataset(
1408
+ name,
1409
+ namespace_name=project.namespace.name if project else None,
1410
+ project_name=project.name if project else None,
1411
+ )
1353
1412
  return self.warehouse.dataset_table_export_file_names(dataset, version)
1354
1413
 
1355
1414
  def remove_dataset(
@@ -1359,7 +1418,11 @@ class Catalog:
1359
1418
  version: Optional[str] = None,
1360
1419
  force: Optional[bool] = False,
1361
1420
  ):
1362
- dataset = self.get_dataset(name, project)
1421
+ dataset = self.get_dataset(
1422
+ name,
1423
+ namespace_name=project.namespace.name if project else None,
1424
+ project_name=project.name if project else None,
1425
+ )
1363
1426
  if not version and not force:
1364
1427
  raise ValueError(f"Missing dataset version from input for dataset {name}")
1365
1428
  if version and not dataset.has_version(version):
@@ -1395,7 +1458,11 @@ class Catalog:
1395
1458
  if attrs is not None:
1396
1459
  update_data["attrs"] = attrs # type: ignore[assignment]
1397
1460
 
1398
- dataset = self.get_dataset(name, project)
1461
+ dataset = self.get_dataset(
1462
+ name,
1463
+ namespace_name=project.namespace.name if project else None,
1464
+ project_name=project.name if project else None,
1465
+ )
1399
1466
  return self.update_dataset(dataset, **update_data)
1400
1467
 
1401
1468
  def ls(
@@ -1549,7 +1616,9 @@ class Catalog:
1549
1616
  )
1550
1617
 
1551
1618
  try:
1552
- local_dataset = self.get_dataset(local_ds_name, project=project)
1619
+ local_dataset = self.get_dataset(
1620
+ local_ds_name, namespace_name=namespace.name, project_name=project.name
1621
+ )
1553
1622
  if local_dataset and local_dataset.has_version(local_ds_version):
1554
1623
  raise DataChainError(
1555
1624
  f"Local dataset {local_ds_uri} already exists with different uuid,"
@@ -127,7 +127,8 @@ def get_udf_distributor_class() -> Optional[type["AbstractUDFDistributor"]]:
127
127
 
128
128
 
129
129
  def get_catalog(
130
- client_config: Optional[dict[str, Any]] = None, in_memory: bool = False
130
+ client_config: Optional[dict[str, Any]] = None,
131
+ in_memory: bool = False,
131
132
  ) -> "Catalog":
132
133
  """
133
134
  Function that creates Catalog instance with appropriate metastore
@@ -142,8 +143,9 @@ def get_catalog(
142
143
  """
143
144
  from datachain.catalog import Catalog
144
145
 
146
+ metastore = get_metastore(in_memory=in_memory)
145
147
  return Catalog(
146
- metastore=get_metastore(in_memory=in_memory),
148
+ metastore=metastore,
147
149
  warehouse=get_warehouse(in_memory=in_memory),
148
150
  client_config=client_config,
149
151
  in_memory=in_memory,
datachain/cli/__init__.py CHANGED
@@ -6,6 +6,7 @@ from multiprocessing import freeze_support
6
6
  from typing import Optional
7
7
 
8
8
  from datachain.cli.utils import get_logging_level
9
+ from datachain.error import DataChainError as DataChainError
9
10
 
10
11
  from .commands import (
11
12
  clear_cache,
@@ -6,6 +6,7 @@ from tabulate import tabulate
6
6
  if TYPE_CHECKING:
7
7
  from datachain.catalog import Catalog
8
8
 
9
+ from datachain.catalog import is_namespace_local
9
10
  from datachain.cli.utils import determine_flavors
10
11
  from datachain.config import Config
11
12
  from datachain.error import DataChainError, DatasetNotFoundError
@@ -107,8 +108,9 @@ def list_datasets_local(catalog: "Catalog", name: Optional[str] = None):
107
108
  def list_datasets_local_versions(catalog: "Catalog", name: str):
108
109
  namespace_name, project_name, name = catalog.get_full_dataset_name(name)
109
110
 
110
- project = catalog.metastore.get_project(project_name, namespace_name)
111
- ds = catalog.get_dataset(name, project)
111
+ ds = catalog.get_dataset(
112
+ name, namespace_name=namespace_name, project_name=project_name
113
+ )
112
114
  for v in ds.versions:
113
115
  yield (name, v.version)
114
116
 
@@ -137,15 +139,18 @@ def rm_dataset(
137
139
  ):
138
140
  namespace_name, project_name, name = catalog.get_full_dataset_name(name)
139
141
 
140
- if not catalog.metastore.is_local_dataset(namespace_name) and studio:
142
+ if studio:
143
+ # removing Studio dataset from CLI
141
144
  from datachain.studio import remove_studio_dataset
142
145
 
143
- token = Config().read().get("studio", {}).get("token")
144
- if not token:
146
+ if Config().read().get("studio", {}).get("token"):
147
+ remove_studio_dataset(
148
+ team, name, namespace_name, project_name, version, force
149
+ )
150
+ else:
145
151
  raise DataChainError(
146
152
  "Not logged in to Studio. Log in with 'datachain auth login'."
147
153
  )
148
- remove_studio_dataset(team, name, namespace_name, project_name, version, force)
149
154
  else:
150
155
  try:
151
156
  project = catalog.metastore.get_project(project_name, namespace_name)
@@ -162,9 +167,11 @@ def edit_dataset(
162
167
  attrs: Optional[list[str]] = None,
163
168
  team: Optional[str] = None,
164
169
  ):
170
+ from datachain.lib.dc.utils import is_studio
171
+
165
172
  namespace_name, project_name, name = catalog.get_full_dataset_name(name)
166
173
 
167
- if catalog.metastore.is_local_dataset(namespace_name):
174
+ if is_studio() or is_namespace_local(namespace_name):
168
175
  try:
169
176
  catalog.edit_dataset(
170
177
  name, catalog.metastore.default_project, new_name, description, attrs
@@ -174,11 +181,11 @@ def edit_dataset(
174
181
  else:
175
182
  from datachain.studio import edit_studio_dataset
176
183
 
177
- token = Config().read().get("studio", {}).get("token")
178
- if not token:
184
+ if Config().read().get("studio", {}).get("token"):
185
+ edit_studio_dataset(
186
+ team, name, namespace_name, project_name, new_name, description, attrs
187
+ )
188
+ else:
179
189
  raise DataChainError(
180
190
  "Not logged in to Studio. Log in with 'datachain auth login'."
181
191
  )
182
- edit_studio_dataset(
183
- team, name, namespace_name, project_name, new_name, description, attrs
184
- )
@@ -145,23 +145,6 @@ class AbstractMetastore(ABC, Serializable):
145
145
  def list_namespaces(self, conn=None) -> list[Namespace]:
146
146
  """Gets a list of all namespaces"""
147
147
 
148
- @property
149
- @abstractmethod
150
- def is_studio(self) -> bool:
151
- """Returns True if this code is ran in Studio"""
152
-
153
- def is_local_dataset(self, dataset_namespace: str) -> bool:
154
- """
155
- Returns True if this is local dataset i.e. not pulled from Studio but
156
- created locally. This is False if we ran code in CLI mode but using dataset
157
- names that are present in Studio.
158
- """
159
- return self.is_studio or dataset_namespace == Namespace.default()
160
-
161
- @property
162
- def namespace_allowed_to_create(self):
163
- return self.is_studio
164
-
165
148
  #
166
149
  # Projects
167
150
  #
@@ -215,10 +198,6 @@ class AbstractMetastore(ABC, Serializable):
215
198
  def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
216
199
  """Gets list of projects in some namespace or in general (in all namespaces)"""
217
200
 
218
- @property
219
- def project_allowed_to_create(self):
220
- return self.is_studio
221
-
222
201
  #
223
202
  # Datasets
224
203
  #
@@ -301,7 +280,13 @@ class AbstractMetastore(ABC, Serializable):
301
280
  """
302
281
 
303
282
  @abstractmethod
304
- def get_dataset(self, name: str, project_id: Optional[int] = None) -> DatasetRecord:
283
+ def get_dataset(
284
+ self,
285
+ name: str, # normal, not full dataset name
286
+ namespace_name: Optional[str] = None,
287
+ project_name: Optional[str] = None,
288
+ conn=None,
289
+ ) -> DatasetRecord:
305
290
  """Gets a single dataset by name."""
306
291
 
307
292
  @abstractmethod
@@ -912,11 +897,14 @@ class AbstractDBMetastore(AbstractMetastore):
912
897
  **kwargs, # TODO registered = True / False
913
898
  ) -> DatasetRecord:
914
899
  """Creates new dataset."""
915
- project_id = project_id or self.default_project.id
900
+ if not project_id:
901
+ project = self.default_project
902
+ else:
903
+ project = self.get_project_by_id(project_id)
916
904
 
917
905
  query = self._datasets_insert().values(
918
906
  name=name,
919
- project_id=project_id,
907
+ project_id=project.id,
920
908
  status=status,
921
909
  feature_schema=json.dumps(feature_schema or {}),
922
910
  created_at=datetime.now(timezone.utc),
@@ -935,7 +923,9 @@ class AbstractDBMetastore(AbstractMetastore):
935
923
  query = query.on_conflict_do_nothing(index_elements=["project_id", "name"])
936
924
  self.db.execute(query)
937
925
 
938
- return self.get_dataset(name, project_id)
926
+ return self.get_dataset(
927
+ name, namespace_name=project.namespace.name, project_name=project.name
928
+ )
939
929
 
940
930
  def create_dataset_version( # noqa: PLR0913
941
931
  self,
@@ -992,7 +982,12 @@ class AbstractDBMetastore(AbstractMetastore):
992
982
  )
993
983
  self.db.execute(query, conn=conn)
994
984
 
995
- return self.get_dataset(dataset.name, dataset.project.id, conn=conn)
985
+ return self.get_dataset(
986
+ dataset.name,
987
+ namespace_name=dataset.project.namespace.name,
988
+ project_name=dataset.project.name,
989
+ conn=conn,
990
+ )
996
991
 
997
992
  def remove_dataset(self, dataset: DatasetRecord) -> None:
998
993
  """Removes dataset."""
@@ -1216,21 +1211,30 @@ class AbstractDBMetastore(AbstractMetastore):
1216
1211
  def get_dataset(
1217
1212
  self,
1218
1213
  name: str, # normal, not full dataset name
1219
- project_id: Optional[int] = None,
1214
+ namespace_name: Optional[str] = None,
1215
+ project_name: Optional[str] = None,
1220
1216
  conn=None,
1221
1217
  ) -> DatasetRecord:
1222
1218
  """
1223
1219
  Gets a single dataset in project by dataset name.
1224
1220
  """
1225
- project_id = project_id or self.default_project.id
1221
+ namespace_name = namespace_name or self.default_namespace_name
1222
+ project_name = project_name or self.default_project_name
1226
1223
 
1227
1224
  d = self._datasets
1225
+ n = self._namespaces
1226
+ p = self._projects
1228
1227
  query = self._base_dataset_query()
1229
- query = query.where(d.c.name == name, d.c.project_id == project_id) # type: ignore [attr-defined]
1228
+ query = query.where(
1229
+ d.c.name == name,
1230
+ n.c.name == namespace_name,
1231
+ p.c.name == project_name,
1232
+ ) # type: ignore [attr-defined]
1230
1233
  ds = self._parse_dataset(self.db.execute(query, conn=conn))
1231
1234
  if not ds:
1232
1235
  raise DatasetNotFoundError(
1233
- f"Dataset {name} not found in project with id {project_id}"
1236
+ f"Dataset {name} not found in namespace {namespace_name}"
1237
+ f" and project {project_name}"
1234
1238
  )
1235
1239
 
1236
1240
  return ds
@@ -542,10 +542,6 @@ class SQLiteMetastore(AbstractDBMetastore):
542
542
  def _jobs_insert(self) -> "Insert":
543
543
  return sqlite.insert(self._jobs)
544
544
 
545
- @property
546
- def is_studio(self) -> bool:
547
- return False
548
-
549
545
  #
550
546
  # Namespaces
551
547
  #
datachain/delta.py CHANGED
@@ -77,7 +77,8 @@ def _get_delta_chain(
77
77
 
78
78
  def _get_retry_chain(
79
79
  name: str,
80
- project: Project,
80
+ namespace_name: str,
81
+ project_name: str,
81
82
  latest_version: str,
82
83
  source_ds_name: str,
83
84
  source_ds_project: Project,
@@ -96,8 +97,8 @@ def _get_retry_chain(
96
97
  # Read the latest version of the result dataset for retry logic
97
98
  result_dataset = datachain.read_dataset(
98
99
  name,
99
- namespace=project.namespace.name,
100
- project=project.name,
100
+ namespace=namespace_name,
101
+ project=project_name,
101
102
  version=latest_version,
102
103
  )
103
104
  source_dc = datachain.read_dataset(
@@ -128,7 +129,8 @@ def _get_retry_chain(
128
129
 
129
130
  def _get_source_info(
130
131
  name: str,
131
- project: Project,
132
+ namespace_name: str,
133
+ project_name: str,
132
134
  latest_version: str,
133
135
  catalog,
134
136
  ) -> tuple[
@@ -145,7 +147,11 @@ def _get_source_info(
145
147
  Returns (None, None, None, None) if source dataset was removed.
146
148
  """
147
149
  dependencies = catalog.get_dataset_dependencies(
148
- name, latest_version, project=project, indirect=False
150
+ name,
151
+ latest_version,
152
+ namespace_name=namespace_name,
153
+ project_name=project_name,
154
+ indirect=False,
149
155
  )
150
156
 
151
157
  dep = dependencies[0]
@@ -157,7 +163,9 @@ def _get_source_info(
157
163
  source_ds_name = dep.name
158
164
  source_ds_version = dep.version
159
165
  source_ds_latest_version = catalog.get_dataset(
160
- source_ds_name, project=source_ds_project
166
+ source_ds_name,
167
+ namespace_name=source_ds_project.namespace.name,
168
+ project_name=source_ds_project.name,
161
169
  ).latest_version
162
170
 
163
171
  return (
@@ -211,12 +219,14 @@ def delta_retry_update(
211
219
  """
212
220
 
213
221
  catalog = dc.session.catalog
214
- project = catalog.metastore.get_project(project_name, namespace_name)
222
+ # project = catalog.metastore.get_project(project_name, namespace_name)
215
223
  dc._query.apply_listing_pre_step()
216
224
 
217
225
  # Check if dataset exists
218
226
  try:
219
- dataset = catalog.get_dataset(name, project=project)
227
+ dataset = catalog.get_dataset(
228
+ name, namespace_name=namespace_name, project_name=project_name
229
+ )
220
230
  latest_version = dataset.latest_version
221
231
  except DatasetNotFoundError:
222
232
  # First creation of result dataset
@@ -234,7 +244,7 @@ def delta_retry_update(
234
244
  source_ds_version,
235
245
  source_ds_latest_version,
236
246
  dependencies,
237
- ) = _get_source_info(name, project, latest_version, catalog)
247
+ ) = _get_source_info(name, namespace_name, project_name, latest_version, catalog)
238
248
 
239
249
  # If source_ds_name is None, starting dataset was removed
240
250
  if source_ds_name is None:
@@ -264,7 +274,8 @@ def delta_retry_update(
264
274
  if delta_retry:
265
275
  retry_chain = _get_retry_chain(
266
276
  name,
267
- project,
277
+ namespace_name,
278
+ project_name,
268
279
  latest_version,
269
280
  source_ds_name,
270
281
  source_ds_project,
@@ -290,8 +301,8 @@ def delta_retry_update(
290
301
 
291
302
  latest_dataset = datachain.read_dataset(
292
303
  name,
293
- namespace=project.namespace.name,
294
- project=project.name,
304
+ namespace=namespace_name,
305
+ project=project_name,
295
306
  version=latest_version,
296
307
  )
297
308
  compared_chain = latest_dataset.diff(
datachain/func/string.py CHANGED
@@ -6,6 +6,14 @@ from datachain.sql.functions import string
6
6
 
7
7
  from .func import ColT, Func
8
8
 
9
+ __all__ = [
10
+ "byte_hamming_distance",
11
+ "length",
12
+ "regexp_replace",
13
+ "replace",
14
+ "split",
15
+ ]
16
+
9
17
 
10
18
  def length(col: ColT) -> Func:
11
19
  """
@@ -9,7 +9,7 @@ from .pandas import read_pandas
9
9
  from .parquet import read_parquet
10
10
  from .records import read_records
11
11
  from .storage import read_storage
12
- from .utils import DatasetMergeError, DatasetPrepareError, Sys
12
+ from .utils import DatasetMergeError, DatasetPrepareError, Sys, is_studio
13
13
  from .values import read_values
14
14
 
15
15
  __all__ = [
@@ -21,6 +21,7 @@ __all__ = [
21
21
  "Sys",
22
22
  "datasets",
23
23
  "delete_dataset",
24
+ "is_studio",
24
25
  "listings",
25
26
  "move_dataset",
26
27
  "read_csv",