datachain 0.16.5__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -33,6 +33,7 @@ from datachain.cache import Cache
33
33
  from datachain.client import Client
34
34
  from datachain.dataset import (
35
35
  DATASET_PREFIX,
36
+ DEFAULT_DATASET_VERSION,
36
37
  QUERY_DATASET_PREFIX,
37
38
  DatasetDependency,
38
39
  DatasetListRecord,
@@ -154,9 +155,9 @@ class DatasetRowsFetcher(NodesThreadPool):
154
155
  metastore: "AbstractMetastore",
155
156
  warehouse: "AbstractWarehouse",
156
157
  remote_ds_name: str,
157
- remote_ds_version: int,
158
+ remote_ds_version: str,
158
159
  local_ds_name: str,
159
- local_ds_version: int,
160
+ local_ds_version: str,
160
161
  schema: dict[str, Union[SQLType, type[SQLType]]],
161
162
  max_threads: int = PULL_DATASET_MAX_THREADS,
162
163
  progress_bar=None,
@@ -286,7 +287,7 @@ class NodeGroup:
286
287
  # (not including the bucket name or s3:// prefix)
287
288
  source_path: str = ""
288
289
  dataset_name: Optional[str] = None
289
- dataset_version: Optional[int] = None
290
+ dataset_version: Optional[str] = None
290
291
  instantiated_nodes: Optional[list[NodeWithPath]] = None
291
292
 
292
293
  @property
@@ -607,7 +608,7 @@ class Catalog:
607
608
  return lst, client, list_path
608
609
 
609
610
  def _remove_dataset_rows_and_warehouse_info(
610
- self, dataset: DatasetRecord, version: int, **kwargs
611
+ self, dataset: DatasetRecord, version: str, **kwargs
611
612
  ):
612
613
  self.warehouse.drop_dataset_rows_table(dataset, version)
613
614
  self.update_dataset_version_with_warehouse_info(
@@ -767,7 +768,7 @@ class Catalog:
767
768
  def create_dataset(
768
769
  self,
769
770
  name: str,
770
- version: Optional[int] = None,
771
+ version: Optional[str] = None,
771
772
  *,
772
773
  columns: Sequence[Column],
773
774
  feature_schema: Optional[dict] = None,
@@ -783,18 +784,17 @@ class Catalog:
783
784
  Creates new dataset of a specific version.
784
785
  If dataset is not yet created, it will create it with version 1
785
786
  If version is None, then next unused version is created.
786
- If version is given, then it must be an unused version number.
787
+ If version is given, then it must be an unused version.
787
788
  """
788
789
  assert [c.name for c in columns if c.name != "sys__id"], f"got {columns=}"
789
790
  if not listing and Client.is_data_source_uri(name):
790
791
  raise RuntimeError(
791
792
  "Cannot create dataset that starts with source prefix, e.g s3://"
792
793
  )
793
- default_version = 1
794
+ default_version = DEFAULT_DATASET_VERSION
794
795
  try:
795
796
  dataset = self.get_dataset(name)
796
- default_version = dataset.next_version
797
-
797
+ default_version = dataset.next_version_patch
798
798
  if (description or attrs) and (
799
799
  dataset.description != description or dataset.attrs != attrs
800
800
  ):
@@ -846,7 +846,7 @@ class Catalog:
846
846
  def create_new_dataset_version(
847
847
  self,
848
848
  dataset: DatasetRecord,
849
- version: int,
849
+ version: str,
850
850
  *,
851
851
  columns: Sequence[Column],
852
852
  sources="",
@@ -892,7 +892,7 @@ class Catalog:
892
892
  return dataset
893
893
 
894
894
  def update_dataset_version_with_warehouse_info(
895
- self, dataset: DatasetRecord, version: int, rows_dropped=False, **kwargs
895
+ self, dataset: DatasetRecord, version: str, rows_dropped=False, **kwargs
896
896
  ) -> None:
897
897
  from datachain.query.dataset import DatasetQuery
898
898
 
@@ -959,7 +959,7 @@ class Catalog:
959
959
  return dataset
960
960
 
961
961
  def remove_dataset_version(
962
- self, dataset: DatasetRecord, version: int, drop_rows: Optional[bool] = True
962
+ self, dataset: DatasetRecord, version: str, drop_rows: Optional[bool] = True
963
963
  ) -> None:
964
964
  """
965
965
  Deletes one single dataset version.
@@ -1037,82 +1037,11 @@ class Catalog:
1037
1037
 
1038
1038
  return self.get_dataset(name)
1039
1039
 
1040
- def register_dataset(
1041
- self,
1042
- dataset: DatasetRecord,
1043
- version: int,
1044
- target_dataset: DatasetRecord,
1045
- target_version: Optional[int] = None,
1046
- ) -> DatasetRecord:
1047
- """
1048
- Registers dataset version of one dataset as dataset version of another
1049
- one (it can be new version of existing one).
1050
- It also removes original dataset version
1051
- """
1052
- target_version = target_version or target_dataset.next_version
1053
-
1054
- if not target_dataset.is_valid_next_version(target_version):
1055
- raise DatasetInvalidVersionError(
1056
- f"Version {target_version} must be higher than the current latest one"
1057
- )
1058
-
1059
- dataset_version = dataset.get_version(version)
1060
- if not dataset_version:
1061
- raise DatasetVersionNotFoundError(
1062
- f"Dataset {dataset.name} does not have version {version}"
1063
- )
1064
-
1065
- if not dataset_version.is_final_status():
1066
- raise ValueError("Cannot register dataset version in non final status")
1067
-
1068
- # copy dataset version
1069
- target_dataset = self.metastore.create_dataset_version(
1070
- target_dataset,
1071
- target_version,
1072
- sources=dataset_version.sources,
1073
- status=dataset_version.status,
1074
- query_script=dataset_version.query_script,
1075
- error_message=dataset_version.error_message,
1076
- error_stack=dataset_version.error_stack,
1077
- script_output=dataset_version.script_output,
1078
- created_at=dataset_version.created_at,
1079
- finished_at=dataset_version.finished_at,
1080
- schema=dataset_version.serialized_schema,
1081
- num_objects=dataset_version.num_objects,
1082
- size=dataset_version.size,
1083
- preview=dataset_version.preview,
1084
- job_id=dataset_version.job_id,
1085
- )
1086
-
1087
- # to avoid re-creating rows table, we are just renaming it for a new version
1088
- # of target dataset
1089
- self.warehouse.rename_dataset_table(
1090
- dataset.name,
1091
- target_dataset.name,
1092
- old_version=version,
1093
- new_version=target_version,
1094
- )
1095
- self.metastore.update_dataset_dependency_source(
1096
- dataset,
1097
- version,
1098
- new_source_dataset=target_dataset,
1099
- new_source_dataset_version=target_version,
1100
- )
1101
-
1102
- if dataset.id == target_dataset.id:
1103
- # we are updating the same dataset so we need to refresh it to have newly
1104
- # added version in step before
1105
- dataset = self.get_dataset(dataset.name)
1106
-
1107
- self.remove_dataset_version(dataset, version, drop_rows=False)
1108
-
1109
- return self.get_dataset(target_dataset.name)
1110
-
1111
1040
  def get_dataset(self, name: str) -> DatasetRecord:
1112
1041
  return self.metastore.get_dataset(name)
1113
1042
 
1114
1043
  def get_dataset_with_remote_fallback(
1115
- self, name: str, version: Optional[int] = None
1044
+ self, name: str, version: Optional[str] = None
1116
1045
  ) -> DatasetRecord:
1117
1046
  try:
1118
1047
  ds = self.get_dataset(name)
@@ -1157,7 +1086,7 @@ class Catalog:
1157
1086
  return DatasetRecord.from_dict(dataset_info)
1158
1087
 
1159
1088
  def get_dataset_dependencies(
1160
- self, name: str, version: int, indirect=False
1089
+ self, name: str, version: str, indirect=False
1161
1090
  ) -> list[Optional[DatasetDependency]]:
1162
1091
  dataset = self.get_dataset(name)
1163
1092
 
@@ -1175,7 +1104,7 @@ class Catalog:
1175
1104
  if d.is_dataset:
1176
1105
  # only datasets can have dependencies
1177
1106
  d.dependencies = self.get_dataset_dependencies(
1178
- d.name, int(d.version), indirect=indirect
1107
+ d.name, d.version, indirect=indirect
1179
1108
  )
1180
1109
 
1181
1110
  return direct_dependencies
@@ -1244,7 +1173,7 @@ class Catalog:
1244
1173
  ]
1245
1174
 
1246
1175
  def ls_dataset_rows(
1247
- self, name: str, version: int, offset=None, limit=None
1176
+ self, name: str, version: str, offset=None, limit=None
1248
1177
  ) -> list[dict]:
1249
1178
  from datachain.query.dataset import DatasetQuery
1250
1179
 
@@ -1282,7 +1211,7 @@ class Catalog:
1282
1211
  self,
1283
1212
  bucket_uri: str,
1284
1213
  name: str,
1285
- version: int,
1214
+ version: str,
1286
1215
  client_config=None,
1287
1216
  ) -> list[str]:
1288
1217
  dataset = self.get_dataset(name)
@@ -1291,14 +1220,14 @@ class Catalog:
1291
1220
  bucket_uri, dataset, version, client_config
1292
1221
  )
1293
1222
 
1294
- def dataset_table_export_file_names(self, name: str, version: int) -> list[str]:
1223
+ def dataset_table_export_file_names(self, name: str, version: str) -> list[str]:
1295
1224
  dataset = self.get_dataset(name)
1296
1225
  return self.warehouse.dataset_table_export_file_names(dataset, version)
1297
1226
 
1298
1227
  def remove_dataset(
1299
1228
  self,
1300
1229
  name: str,
1301
- version: Optional[int] = None,
1230
+ version: Optional[str] = None,
1302
1231
  force: Optional[bool] = False,
1303
1232
  studio: Optional[bool] = False,
1304
1233
  ):
@@ -1372,7 +1301,7 @@ class Catalog:
1372
1301
  remote_ds_uri: str,
1373
1302
  output: Optional[str] = None,
1374
1303
  local_ds_name: Optional[str] = None,
1375
- local_ds_version: Optional[int] = None,
1304
+ local_ds_version: Optional[str] = None,
1376
1305
  cp: bool = False,
1377
1306
  force: bool = False,
1378
1307
  *,
@@ -127,7 +127,7 @@ def _datasets_tabulate_row(name, both, local_version, studio_version):
127
127
  def rm_dataset(
128
128
  catalog: "Catalog",
129
129
  name: str,
130
- version: Optional[int] = None,
130
+ version: Optional[str] = None,
131
131
  force: Optional[bool] = False,
132
132
  studio: bool = False,
133
133
  local: bool = False,
@@ -10,7 +10,7 @@ if TYPE_CHECKING:
10
10
  def show(
11
11
  catalog: "Catalog",
12
12
  name: str,
13
- version: Optional[int] = None,
13
+ version: Optional[str] = None,
14
14
  limit: int = 10,
15
15
  offset: int = 0,
16
16
  columns: Sequence[str] = (),
@@ -302,7 +302,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
302
302
  "--version",
303
303
  action="store",
304
304
  default=None,
305
- type=int,
305
+ type=str,
306
306
  help="Dataset version",
307
307
  )
308
308
  rm_dataset_parser.add_argument(
@@ -495,7 +495,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
495
495
  "--version",
496
496
  action="store",
497
497
  default=None,
498
- type=int,
498
+ type=str,
499
499
  help="Dataset version",
500
500
  )
501
501
  show_parser.add_argument("--schema", action="store_true", help="Show schema")
@@ -128,7 +128,7 @@ class AbstractMetastore(ABC, Serializable):
128
128
  def create_dataset_version( # noqa: PLR0913
129
129
  self,
130
130
  dataset: DatasetRecord,
131
- version: int,
131
+ version: str,
132
132
  status: int,
133
133
  sources: str = "",
134
134
  feature_schema: Optional[dict] = None,
@@ -158,13 +158,13 @@ class AbstractMetastore(ABC, Serializable):
158
158
 
159
159
  @abstractmethod
160
160
  def update_dataset_version(
161
- self, dataset: DatasetRecord, version: int, **kwargs
161
+ self, dataset: DatasetRecord, version: str, **kwargs
162
162
  ) -> DatasetVersion:
163
163
  """Updates dataset version fields."""
164
164
 
165
165
  @abstractmethod
166
166
  def remove_dataset_version(
167
- self, dataset: DatasetRecord, version: int
167
+ self, dataset: DatasetRecord, version: str
168
168
  ) -> DatasetRecord:
169
169
  """
170
170
  Deletes one single dataset version.
@@ -188,7 +188,7 @@ class AbstractMetastore(ABC, Serializable):
188
188
  self,
189
189
  dataset: DatasetRecord,
190
190
  status: int,
191
- version: Optional[int] = None,
191
+ version: Optional[str] = None,
192
192
  error_message="",
193
193
  error_stack="",
194
194
  script_output="",
@@ -202,9 +202,9 @@ class AbstractMetastore(ABC, Serializable):
202
202
  def add_dataset_dependency(
203
203
  self,
204
204
  source_dataset_name: str,
205
- source_dataset_version: int,
205
+ source_dataset_version: str,
206
206
  dataset_name: str,
207
- dataset_version: int,
207
+ dataset_version: str,
208
208
  ) -> None:
209
209
  """Adds dataset dependency to dataset."""
210
210
 
@@ -212,21 +212,21 @@ class AbstractMetastore(ABC, Serializable):
212
212
  def update_dataset_dependency_source(
213
213
  self,
214
214
  source_dataset: DatasetRecord,
215
- source_dataset_version: int,
215
+ source_dataset_version: str,
216
216
  new_source_dataset: Optional[DatasetRecord] = None,
217
- new_source_dataset_version: Optional[int] = None,
217
+ new_source_dataset_version: Optional[str] = None,
218
218
  ) -> None:
219
219
  """Updates dataset dependency source."""
220
220
 
221
221
  @abstractmethod
222
222
  def get_direct_dataset_dependencies(
223
- self, dataset: DatasetRecord, version: int
223
+ self, dataset: DatasetRecord, version: str
224
224
  ) -> list[Optional[DatasetDependency]]:
225
225
  """Gets direct dataset dependencies."""
226
226
 
227
227
  @abstractmethod
228
228
  def remove_dataset_dependencies(
229
- self, dataset: DatasetRecord, version: Optional[int] = None
229
+ self, dataset: DatasetRecord, version: Optional[str] = None
230
230
  ) -> None:
231
231
  """
232
232
  When we remove dataset, we need to clean up it's dependencies as well.
@@ -234,7 +234,7 @@ class AbstractMetastore(ABC, Serializable):
234
234
 
235
235
  @abstractmethod
236
236
  def remove_dataset_dependants(
237
- self, dataset: DatasetRecord, version: Optional[int] = None
237
+ self, dataset: DatasetRecord, version: Optional[str] = None
238
238
  ) -> None:
239
239
  """
240
240
  When we remove dataset, we need to clear its references in other dataset
@@ -370,7 +370,7 @@ class AbstractDBMetastore(AbstractMetastore):
370
370
  ForeignKey(f"{cls.DATASET_TABLE}.id", ondelete="CASCADE"),
371
371
  nullable=False,
372
372
  ),
373
- Column("version", Integer, nullable=False),
373
+ Column("version", Text, nullable=False, default="1.0.0"),
374
374
  Column(
375
375
  "status",
376
376
  Integer,
@@ -554,7 +554,7 @@ class AbstractDBMetastore(AbstractMetastore):
554
554
  def create_dataset_version( # noqa: PLR0913
555
555
  self,
556
556
  dataset: DatasetRecord,
557
- version: int,
557
+ version: str,
558
558
  status: int,
559
559
  sources: str = "",
560
560
  feature_schema: Optional[dict] = None,
@@ -648,7 +648,7 @@ class AbstractDBMetastore(AbstractMetastore):
648
648
  return result_ds
649
649
 
650
650
  def update_dataset_version(
651
- self, dataset: DatasetRecord, version: int, conn=None, **kwargs
651
+ self, dataset: DatasetRecord, version: str, conn=None, **kwargs
652
652
  ) -> DatasetVersion:
653
653
  """Updates dataset fields."""
654
654
  dataset_version = dataset.get_version(version)
@@ -758,7 +758,7 @@ class AbstractDBMetastore(AbstractMetastore):
758
758
  return ds
759
759
 
760
760
  def remove_dataset_version(
761
- self, dataset: DatasetRecord, version: int
761
+ self, dataset: DatasetRecord, version: str
762
762
  ) -> DatasetRecord:
763
763
  """
764
764
  Deletes one single dataset version.
@@ -791,7 +791,7 @@ class AbstractDBMetastore(AbstractMetastore):
791
791
  self,
792
792
  dataset: DatasetRecord,
793
793
  status: int,
794
- version: Optional[int] = None,
794
+ version: Optional[str] = None,
795
795
  error_message="",
796
796
  error_stack="",
797
797
  script_output="",
@@ -825,9 +825,9 @@ class AbstractDBMetastore(AbstractMetastore):
825
825
  def add_dataset_dependency(
826
826
  self,
827
827
  source_dataset_name: str,
828
- source_dataset_version: int,
828
+ source_dataset_version: str,
829
829
  dataset_name: str,
830
- dataset_version: int,
830
+ dataset_version: str,
831
831
  ) -> None:
832
832
  """Adds dataset dependency to dataset."""
833
833
  source_dataset = self.get_dataset(source_dataset_name)
@@ -847,9 +847,9 @@ class AbstractDBMetastore(AbstractMetastore):
847
847
  def update_dataset_dependency_source(
848
848
  self,
849
849
  source_dataset: DatasetRecord,
850
- source_dataset_version: int,
850
+ source_dataset_version: str,
851
851
  new_source_dataset: Optional[DatasetRecord] = None,
852
- new_source_dataset_version: Optional[int] = None,
852
+ new_source_dataset_version: Optional[str] = None,
853
853
  ) -> None:
854
854
  dd = self._datasets_dependencies
855
855
 
@@ -880,7 +880,7 @@ class AbstractDBMetastore(AbstractMetastore):
880
880
  """
881
881
 
882
882
  def get_direct_dataset_dependencies(
883
- self, dataset: DatasetRecord, version: int
883
+ self, dataset: DatasetRecord, version: str
884
884
  ) -> list[Optional[DatasetDependency]]:
885
885
  d = self._datasets
886
886
  dd = self._datasets_dependencies
@@ -909,7 +909,7 @@ class AbstractDBMetastore(AbstractMetastore):
909
909
  return [self.dependency_class.parse(*r) for r in self.db.execute(query)]
910
910
 
911
911
  def remove_dataset_dependencies(
912
- self, dataset: DatasetRecord, version: Optional[int] = None
912
+ self, dataset: DatasetRecord, version: Optional[str] = None
913
913
  ) -> None:
914
914
  """
915
915
  When we remove dataset, we need to clean up it's dependencies as well
@@ -928,7 +928,7 @@ class AbstractDBMetastore(AbstractMetastore):
928
928
  self.db.execute(q)
929
929
 
930
930
  def remove_dataset_dependants(
931
- self, dataset: DatasetRecord, version: Optional[int] = None
931
+ self, dataset: DatasetRecord, version: Optional[str] = None
932
932
  ) -> None:
933
933
  """
934
934
  When we remove dataset, we need to clear its references in other dataset
@@ -25,6 +25,7 @@ from sqlalchemy.sql.selectable import Select
25
25
  from tqdm.auto import tqdm
26
26
 
27
27
  import datachain.sql.sqlite
28
+ from datachain import semver
28
29
  from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse
29
30
  from datachain.data_storage.db_engine import DatabaseEngine
30
31
  from datachain.data_storage.schema import DefaultSchema
@@ -486,7 +487,7 @@ class SQLiteWarehouse(AbstractWarehouse):
486
487
  return table
487
488
 
488
489
  def get_dataset_sources(
489
- self, dataset: DatasetRecord, version: int
490
+ self, dataset: DatasetRecord, version: str
490
491
  ) -> list[StorageURI]:
491
492
  dr = self.dataset_rows(dataset, version)
492
493
  query = dr.select(dr.c("source", column="file")).distinct()
@@ -502,8 +503,8 @@ class SQLiteWarehouse(AbstractWarehouse):
502
503
  self,
503
504
  src: DatasetRecord,
504
505
  dst: DatasetRecord,
505
- src_version: int,
506
- dst_version: int,
506
+ src_version: str,
507
+ dst_version: str,
507
508
  ) -> None:
508
509
  dst_empty = False
509
510
 
@@ -534,7 +535,7 @@ class SQLiteWarehouse(AbstractWarehouse):
534
535
  dst_previous_versions = [
535
536
  v.version
536
537
  for v in dst.versions # type: ignore [union-attr]
537
- if v.version < dst_version
538
+ if semver.compare(v.version, dst_version) == -1
538
539
  ]
539
540
  if dst_previous_versions:
540
541
  dst_version_latest = max(dst_previous_versions)
@@ -570,7 +571,7 @@ class SQLiteWarehouse(AbstractWarehouse):
570
571
  conn=conn,
571
572
  )
572
573
 
573
- def insert_dataset_rows(self, df, dataset: DatasetRecord, version: int) -> int:
574
+ def insert_dataset_rows(self, df, dataset: DatasetRecord, version: str) -> int:
574
575
  dr = self.dataset_rows(dataset, version)
575
576
  return self.db.insert_dataframe(dr.table.name, df)
576
577
 
@@ -595,7 +596,7 @@ class SQLiteWarehouse(AbstractWarehouse):
595
596
  return col_type.python_type
596
597
 
597
598
  def dataset_table_export_file_names(
598
- self, dataset: DatasetRecord, version: int
599
+ self, dataset: DatasetRecord, version: str
599
600
  ) -> list[str]:
600
601
  raise NotImplementedError("Exporting dataset table not implemented for SQLite")
601
602
 
@@ -603,7 +604,7 @@ class SQLiteWarehouse(AbstractWarehouse):
603
604
  self,
604
605
  bucket_uri: str,
605
606
  dataset: DatasetRecord,
606
- version: int,
607
+ version: str,
607
608
  client_config=None,
608
609
  ) -> list[str]:
609
610
  raise NotImplementedError("Exporting dataset table not implemented for SQLite")
@@ -176,7 +176,7 @@ class AbstractWarehouse(ABC, Serializable):
176
176
  def dataset_rows(
177
177
  self,
178
178
  dataset: DatasetRecord,
179
- version: Optional[int] = None,
179
+ version: Optional[str] = None,
180
180
  column: str = "file",
181
181
  ):
182
182
  version = version or dataset.latest_version
@@ -253,7 +253,7 @@ class AbstractWarehouse(ABC, Serializable):
253
253
  name = parsed.path if parsed.scheme == "file" else parsed.netloc
254
254
  return parsed.scheme, name
255
255
 
256
- def dataset_table_name(self, dataset_name: str, version: int) -> str:
256
+ def dataset_table_name(self, dataset_name: str, version: str) -> str:
257
257
  prefix = self.DATASET_TABLE_PREFIX
258
258
  if Client.is_data_source_uri(dataset_name):
259
259
  # for datasets that are created for bucket listing we use different prefix
@@ -282,7 +282,7 @@ class AbstractWarehouse(ABC, Serializable):
282
282
  def drop_dataset_rows_table(
283
283
  self,
284
284
  dataset: DatasetRecord,
285
- version: int,
285
+ version: str,
286
286
  if_exists: bool = True,
287
287
  ) -> None:
288
288
  """Drops a dataset rows table for the given dataset name."""
@@ -295,8 +295,8 @@ class AbstractWarehouse(ABC, Serializable):
295
295
  self,
296
296
  src: "DatasetRecord",
297
297
  dst: "DatasetRecord",
298
- src_version: int,
299
- dst_version: int,
298
+ src_version: str,
299
+ dst_version: str,
300
300
  ) -> None:
301
301
  """
302
302
  Merges source dataset rows and current latest destination dataset rows
@@ -338,15 +338,15 @@ class AbstractWarehouse(ABC, Serializable):
338
338
 
339
339
  @abstractmethod
340
340
  def get_dataset_sources(
341
- self, dataset: DatasetRecord, version: int
341
+ self, dataset: DatasetRecord, version: str
342
342
  ) -> list[StorageURI]: ...
343
343
 
344
344
  def rename_dataset_table(
345
345
  self,
346
346
  old_name: str,
347
347
  new_name: str,
348
- old_version: int,
349
- new_version: int,
348
+ old_version: str,
349
+ new_version: str,
350
350
  ) -> None:
351
351
  old_ds_table_name = self.dataset_table_name(old_name, old_version)
352
352
  new_ds_table_name = self.dataset_table_name(new_name, new_version)
@@ -362,7 +362,7 @@ class AbstractWarehouse(ABC, Serializable):
362
362
  return res[0]
363
363
 
364
364
  def dataset_stats(
365
- self, dataset: DatasetRecord, version: int
365
+ self, dataset: DatasetRecord, version: str
366
366
  ) -> tuple[Optional[int], Optional[int]]:
367
367
  """
368
368
  Returns tuple with dataset stats: total number of rows and total dataset size.
@@ -399,7 +399,7 @@ class AbstractWarehouse(ABC, Serializable):
399
399
  """
400
400
 
401
401
  @abstractmethod
402
- def insert_dataset_rows(self, df, dataset: DatasetRecord, version: int) -> int:
402
+ def insert_dataset_rows(self, df, dataset: DatasetRecord, version: str) -> int:
403
403
  """Inserts dataset rows directly into dataset table"""
404
404
 
405
405
  @abstractmethod
@@ -418,7 +418,7 @@ class AbstractWarehouse(ABC, Serializable):
418
418
 
419
419
  @abstractmethod
420
420
  def dataset_table_export_file_names(
421
- self, dataset: DatasetRecord, version: int
421
+ self, dataset: DatasetRecord, version: str
422
422
  ) -> list[str]:
423
423
  """
424
424
  Returns list of file names that will be created when user runs dataset export
@@ -429,7 +429,7 @@ class AbstractWarehouse(ABC, Serializable):
429
429
  self,
430
430
  bucket_uri: str,
431
431
  dataset: DatasetRecord,
432
- version: int,
432
+ version: str,
433
433
  client_config=None,
434
434
  ) -> list[str]:
435
435
  """