datachain 0.16.5__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/dataset.py CHANGED
@@ -12,6 +12,7 @@ from typing import (
12
12
  )
13
13
  from urllib.parse import urlparse
14
14
 
15
+ from datachain import semver
15
16
  from datachain.error import DatasetVersionNotFoundError
16
17
  from datachain.sql.types import NAME_TYPES_MAPPING, SQLType
17
18
 
@@ -25,6 +26,8 @@ DATASET_PREFIX = "ds://"
25
26
  QUERY_DATASET_PREFIX = "ds_query_"
26
27
  LISTING_PREFIX = "lst__"
27
28
 
29
+ DEFAULT_DATASET_VERSION = "1.0.0"
30
+
28
31
 
29
32
  # StorageURI represents a normalised URI to a valid storage location (full bucket or
30
33
  # absolute local path).
@@ -33,12 +36,12 @@ LISTING_PREFIX = "lst__"
33
36
  StorageURI = NewType("StorageURI", str)
34
37
 
35
38
 
36
- def parse_dataset_uri(uri: str) -> tuple[str, Optional[int]]:
39
+ def parse_dataset_uri(uri: str) -> tuple[str, Optional[str]]:
37
40
  """
38
41
  Parse dataser uri to extract name and version out of it (if version is defined)
39
42
  Example:
40
- Input: ds://zalando@v3
41
- Output: (zalando, 3)
43
+ Input: ds://zalando@v3.0.1
44
+ Output: (zalando, 3.0.1)
42
45
  """
43
46
  p = urlparse(uri)
44
47
  if p.scheme != "ds":
@@ -51,16 +54,15 @@ def parse_dataset_uri(uri: str) -> tuple[str, Optional[int]]:
51
54
  raise Exception(
52
55
  "Wrong dataset uri format, it should be: ds://<name>@v<version>"
53
56
  )
54
- version = int(s[1])
55
- return name, version
57
+ return name, s[1]
56
58
 
57
59
 
58
- def create_dataset_uri(name: str, version: Optional[int] = None) -> str:
60
+ def create_dataset_uri(name: str, version: Optional[str] = None) -> str:
59
61
  """
60
62
  Creates a dataset uri based on dataset name and optionally version
61
63
  Example:
62
- Input: zalando, 3
63
- Output: ds//zalando@v3
64
+ Input: zalando, 3.0.1
65
+ Output: ds//zalando@v3.0.1
64
66
  """
65
67
  uri = f"{DATASET_PREFIX}{name}"
66
68
  if version:
@@ -79,7 +81,7 @@ class DatasetDependency:
79
81
  id: int
80
82
  type: str
81
83
  name: str
82
- version: str # TODO change to int
84
+ version: str
83
85
  created_at: datetime
84
86
  dependencies: list[Optional["DatasetDependency"]]
85
87
 
@@ -102,7 +104,7 @@ class DatasetDependency:
102
104
  dataset_id: Optional[int],
103
105
  dataset_version_id: Optional[int],
104
106
  dataset_name: Optional[str],
105
- dataset_version: Optional[int],
107
+ dataset_version: Optional[str],
106
108
  dataset_version_created_at: Optional[datetime],
107
109
  ) -> Optional["DatasetDependency"]:
108
110
  from datachain.client import Client
@@ -124,7 +126,7 @@ class DatasetDependency:
124
126
  dependency_type,
125
127
  dependency_name,
126
128
  (
127
- str(dataset_version) # type: ignore[arg-type]
129
+ dataset_version # type: ignore[arg-type]
128
130
  if dataset_version
129
131
  else None
130
132
  ),
@@ -163,7 +165,7 @@ class DatasetVersion:
163
165
  id: int
164
166
  uuid: str
165
167
  dataset_id: int
166
- version: int
168
+ version: str
167
169
  status: int
168
170
  feature_schema: dict
169
171
  created_at: datetime
@@ -185,7 +187,7 @@ class DatasetVersion:
185
187
  id: int,
186
188
  uuid: str,
187
189
  dataset_id: int,
188
- version: int,
190
+ version: str,
189
191
  status: int,
190
192
  feature_schema: Optional[str],
191
193
  created_at: datetime,
@@ -222,6 +224,10 @@ class DatasetVersion:
222
224
  job_id,
223
225
  )
224
226
 
227
+ @property
228
+ def version_value(self) -> int:
229
+ return semver.value(self.version)
230
+
225
231
  def __eq__(self, other):
226
232
  if not isinstance(other, DatasetVersion):
227
233
  return False
@@ -230,7 +236,7 @@ class DatasetVersion:
230
236
  def __lt__(self, other):
231
237
  if not isinstance(other, DatasetVersion):
232
238
  return False
233
- return self.version < other.version
239
+ return self.version_value < other.version_value
234
240
 
235
241
  def __hash__(self):
236
242
  return hash(f"{self.dataset_id}_{self.version}")
@@ -275,7 +281,7 @@ class DatasetListVersion:
275
281
  id: int
276
282
  uuid: str
277
283
  dataset_id: int
278
- version: int
284
+ version: str
279
285
  status: int
280
286
  created_at: datetime
281
287
  finished_at: Optional[datetime]
@@ -292,7 +298,7 @@ class DatasetListVersion:
292
298
  id: int,
293
299
  uuid: str,
294
300
  dataset_id: int,
295
- version: int,
301
+ version: str,
296
302
  status: int,
297
303
  created_at: datetime,
298
304
  finished_at: Optional[datetime],
@@ -323,6 +329,10 @@ class DatasetListVersion:
323
329
  def __hash__(self):
324
330
  return hash(f"{self.dataset_id}_{self.version}")
325
331
 
332
+ @property
333
+ def version_value(self) -> int:
334
+ return semver.value(self.version)
335
+
326
336
 
327
337
  @dataclass
328
338
  class DatasetRecord:
@@ -371,7 +381,7 @@ class DatasetRecord:
371
381
  version_id: int,
372
382
  version_uuid: str,
373
383
  version_dataset_id: int,
374
- version: int,
384
+ version: str,
375
385
  version_status: int,
376
386
  version_feature_schema: Optional[str],
377
387
  version_created_at: datetime,
@@ -441,7 +451,7 @@ class DatasetRecord:
441
451
  for c_name, c_type in self.schema.items()
442
452
  }
443
453
 
444
- def get_schema(self, version: int) -> dict[str, Union[SQLType, type[SQLType]]]:
454
+ def get_schema(self, version: str) -> dict[str, Union[SQLType, type[SQLType]]]:
445
455
  return self.get_version(version).schema if version else self.schema
446
456
 
447
457
  def update(self, **kwargs):
@@ -460,20 +470,23 @@ class DatasetRecord:
460
470
  self.versions = []
461
471
 
462
472
  self.versions = list(set(self.versions + other.versions))
463
- self.versions.sort(key=lambda v: v.version)
473
+ self.versions.sort(key=lambda v: v.version_value)
464
474
  return self
465
475
 
466
- def has_version(self, version: int) -> bool:
467
- return version in self.versions_values
476
+ def has_version(self, version: str) -> bool:
477
+ return version in [v.version for v in self.versions]
468
478
 
469
- def is_valid_next_version(self, version: int) -> bool:
479
+ def is_valid_next_version(self, version: str) -> bool:
470
480
  """
471
481
  Checks if a number can be a valid next latest version for dataset.
472
482
  The only rule is that it cannot be lower than current latest version
473
483
  """
474
- return not (self.latest_version and self.latest_version >= version)
484
+ return not (
485
+ self.latest_version
486
+ and semver.value(self.latest_version) >= semver.value(version)
487
+ )
475
488
 
476
- def get_version(self, version: int) -> DatasetVersion:
489
+ def get_version(self, version: str) -> DatasetVersion:
477
490
  if not self.has_version(version):
478
491
  raise DatasetVersionNotFoundError(
479
492
  f"Dataset {self.name} does not have version {version}"
@@ -496,15 +509,15 @@ class DatasetRecord:
496
509
  f"Dataset {self.name} does not have version with uuid {uuid}"
497
510
  ) from None
498
511
 
499
- def remove_version(self, version: int) -> None:
512
+ def remove_version(self, version: str) -> None:
500
513
  if not self.versions or not self.has_version(version):
501
514
  return
502
515
 
503
516
  self.versions = [v for v in self.versions if v.version != version]
504
517
 
505
- def identifier(self, version: int) -> str:
518
+ def identifier(self, version: str) -> str:
506
519
  """
507
- Get identifier in the form my-dataset@v3
520
+ Get identifier in the form my-dataset@v3.0.1
508
521
  """
509
522
  if not self.has_version(version):
510
523
  raise DatasetVersionNotFoundError(
@@ -512,43 +525,73 @@ class DatasetRecord:
512
525
  )
513
526
  return f"{self.name}@v{version}"
514
527
 
515
- def uri(self, version: int) -> str:
528
+ def uri(self, version: str) -> str:
516
529
  """
517
- Dataset uri example: ds://dogs@v3
530
+ Dataset uri example: ds://dogs@v3.0.1
518
531
  """
519
532
  identifier = self.identifier(version)
520
533
  return f"{DATASET_PREFIX}{identifier}"
521
534
 
522
535
  @property
523
- def versions_values(self) -> list[int]:
536
+ def next_version_major(self) -> str:
524
537
  """
525
- Extracts actual versions from list of DatasetVersion objects
526
- in self.versions attribute
538
+ Returns the next auto-incremented version if the major part is being bumped.
527
539
  """
528
540
  if not self.versions:
529
- return []
541
+ return "1.0.0"
530
542
 
531
- return sorted(v.version for v in self.versions)
543
+ major, minor, patch = semver.parse(self.latest_version)
544
+ return semver.create(major + 1, 0, 0)
532
545
 
533
546
  @property
534
- def next_version(self) -> int:
535
- """Returns what should be next autoincrement version of dataset"""
547
+ def next_version_minor(self) -> str:
548
+ """
549
+ Returns the next auto-incremented version if the minor part is being bumped.
550
+ """
536
551
  if not self.versions:
537
- return 1
538
- return max(self.versions_values) + 1
552
+ return "1.0.0"
553
+
554
+ major, minor, patch = semver.parse(self.latest_version)
555
+ return semver.create(major, minor + 1, 0)
539
556
 
540
557
  @property
541
- def latest_version(self) -> int:
558
+ def next_version_patch(self) -> str:
559
+ """
560
+ Returns the next auto-incremented version if the patch part is being bumped.
561
+ """
562
+ if not self.versions:
563
+ return "1.0.0"
564
+
565
+ major, minor, patch = semver.parse(self.latest_version)
566
+ return semver.create(major, minor, patch + 1)
567
+
568
+ @property
569
+ def latest_version(self) -> str:
542
570
  """Returns latest version of a dataset"""
543
- return max(self.versions_values)
571
+ return max(self.versions).version
572
+
573
+ def latest_major_version(self, major: int) -> Optional[str]:
574
+ """
575
+ Returns latest specific major version, e.g if dataset has versions:
576
+ - 1.4.1
577
+ - 2.0.1
578
+ - 2.1.1
579
+ - 2.4.0
580
+ and we call `.latest_major_version(2)` it will return: "2.4.0".
581
+ If no major version is find with input value, None will be returned
582
+ """
583
+ versions = [v for v in self.versions if semver.parse(v.version)[0] == major]
584
+ if not versions:
585
+ return None
586
+ return max(versions).version
544
587
 
545
588
  @property
546
- def prev_version(self) -> Optional[int]:
589
+ def prev_version(self) -> Optional[str]:
547
590
  """Returns previous version of a dataset"""
548
591
  if len(self.versions) == 1:
549
592
  return None
550
593
 
551
- return sorted(self.versions_values)[-2]
594
+ return sorted(self.versions)[-2].version
552
595
 
553
596
  @classmethod
554
597
  def from_dict(cls, d: dict[str, Any]) -> "DatasetRecord":
@@ -577,7 +620,7 @@ class DatasetListRecord:
577
620
  version_id: int,
578
621
  version_uuid: str,
579
622
  version_dataset_id: int,
580
- version: int,
623
+ version: str,
581
624
  version_status: int,
582
625
  version_created_at: datetime,
583
626
  version_finished_at: Optional[datetime],
@@ -626,11 +669,11 @@ class DatasetListRecord:
626
669
  self.versions = []
627
670
 
628
671
  self.versions = list(set(self.versions + other.versions))
629
- self.versions.sort(key=lambda v: v.version)
672
+ self.versions.sort(key=lambda v: v.version_value)
630
673
  return self
631
674
 
632
675
  def latest_version(self) -> DatasetListVersion:
633
- return max(self.versions, key=lambda v: v.version)
676
+ return max(self.versions, key=lambda v: v.version_value)
634
677
 
635
678
  @property
636
679
  def is_bucket_listing(self) -> bool:
@@ -6,6 +6,7 @@ from uuid import uuid4
6
6
  from pydantic import Field, field_validator
7
7
 
8
8
  from datachain.dataset import (
9
+ DEFAULT_DATASET_VERSION,
9
10
  DatasetListRecord,
10
11
  DatasetListVersion,
11
12
  DatasetStatus,
@@ -22,7 +23,7 @@ if TYPE_CHECKING:
22
23
  class DatasetInfo(DataModel):
23
24
  name: str
24
25
  uuid: str = Field(default=str(uuid4()))
25
- version: int = Field(default=1)
26
+ version: str = Field(default=DEFAULT_DATASET_VERSION)
26
27
  status: int = Field(default=DatasetStatus.CREATED)
27
28
  created_at: datetime = Field(default=TIME_ZERO)
28
29
  finished_at: Optional[datetime] = Field(default=None)
@@ -23,6 +23,7 @@ import sqlalchemy
23
23
  from pydantic import BaseModel
24
24
  from tqdm import tqdm
25
25
 
26
+ from datachain import semver
26
27
  from datachain.dataset import DatasetRecord
27
28
  from datachain.func import literal
28
29
  from datachain.func.base import Function
@@ -214,7 +215,7 @@ class DataChain:
214
215
  return self._query.name
215
216
 
216
217
  @property
217
- def version(self) -> Optional[int]:
218
+ def version(self) -> Optional[str]:
218
219
  """Version of the underlying dataset, if there is one."""
219
220
  return self._query.version
220
221
 
@@ -457,7 +458,7 @@ class DataChain:
457
458
  def save( # type: ignore[override]
458
459
  self,
459
460
  name: str,
460
- version: Optional[int] = None,
461
+ version: Optional[str] = None,
461
462
  description: Optional[str] = None,
462
463
  attrs: Optional[list[str]] = None,
463
464
  **kwargs,
@@ -466,11 +467,15 @@ class DataChain:
466
467
 
467
468
  Parameters:
468
469
  name : dataset name.
469
- version : version of a dataset. Default - the last version that exist.
470
+ version : version of a dataset. If version is not specified and dataset
471
+ already exists, version patch increment will happen e.g 1.2.1 -> 1.2.2.
470
472
  description : description of a dataset.
471
473
  attrs : attributes of a dataset. They can be without value, e.g "NLP",
472
474
  or with a value, e.g "location=US".
473
475
  """
476
+ if version is not None:
477
+ semver.validate(version)
478
+
474
479
  schema = self.signals_schema.clone_without_sys_signals().serialize()
475
480
  return self._evolve(
476
481
  query=self._query.save(
@@ -1,5 +1,6 @@
1
- from typing import TYPE_CHECKING, Optional, get_origin, get_type_hints
1
+ from typing import TYPE_CHECKING, Optional, Union, get_origin, get_type_hints
2
2
 
3
+ from datachain.error import DatasetVersionNotFoundError
3
4
  from datachain.lib.dataset_info import DatasetInfo
4
5
  from datachain.lib.file import (
5
6
  File,
@@ -22,7 +23,7 @@ if TYPE_CHECKING:
22
23
 
23
24
  def read_dataset(
24
25
  name: str,
25
- version: Optional[int] = None,
26
+ version: Optional[Union[str, int]] = None,
26
27
  session: Optional[Session] = None,
27
28
  settings: Optional[dict] = None,
28
29
  fallback_to_studio: bool = True,
@@ -49,7 +50,7 @@ def read_dataset(
49
50
  ```
50
51
 
51
52
  ```py
52
- chain = dc.read_dataset("my_cats", version=1)
53
+ chain = dc.read_dataset("my_cats", version="1.0.0")
53
54
  ```
54
55
 
55
56
  ```py
@@ -63,7 +64,7 @@ def read_dataset(
63
64
  }
64
65
  chain = dc.read_dataset(
65
66
  name="my_cats",
66
- version=1,
67
+ version="1.0.0",
67
68
  session=session,
68
69
  settings=settings,
69
70
  fallback_to_studio=True,
@@ -74,9 +75,29 @@ def read_dataset(
74
75
 
75
76
  from .datachain import DataChain
76
77
 
78
+ if version is not None:
79
+ try:
80
+ # for backward compatibility we still allow users to put version as integer
81
+ # in which case we are trying to find latest version where major part is
82
+ # equal to that input version. For example if user sets version=2, we could
83
+ # continue with something like 2.4.3 (assuming 2.4.3 is the biggest among
84
+ # all 2.* dataset versions). If dataset doesn't have any versions where
85
+ # major part is equal to that input, exception is thrown.
86
+ major = int(version)
87
+ dataset = Session.get(session).catalog.get_dataset(name)
88
+ latest_major = dataset.latest_major_version(major)
89
+ if not latest_major:
90
+ raise DatasetVersionNotFoundError(
91
+ f"Dataset {name} does not have version {version}"
92
+ )
93
+ version = latest_major
94
+ except ValueError:
95
+ # version is in new semver string format, continuing as normal
96
+ pass
97
+
77
98
  query = DatasetQuery(
78
99
  name=name,
79
- version=version,
100
+ version=version, # type: ignore[arg-type]
80
101
  session=session,
81
102
  indexing_column_types=File._datachain_column_types,
82
103
  fallback_to_studio=fallback_to_studio,
@@ -179,7 +200,7 @@ def datasets(
179
200
 
180
201
  def delete_dataset(
181
202
  name: str,
182
- version: Optional[int] = None,
203
+ version: Optional[str] = None,
183
204
  force: Optional[bool] = False,
184
205
  studio: Optional[bool] = False,
185
206
  session: Optional[Session] = None,
@@ -207,7 +228,7 @@ def delete_dataset(
207
228
 
208
229
  ```py
209
230
  import datachain as dc
210
- dc.delete_dataset("cats", version=1)
231
+ dc.delete_dataset("cats", version="1.0.0")
211
232
  ```
212
233
  """
213
234
 
@@ -5,6 +5,7 @@ from typing import (
5
5
  Union,
6
6
  )
7
7
 
8
+ from datachain.error import DatasetNotFoundError
8
9
  from datachain.lib.file import (
9
10
  FileType,
10
11
  get_file_type,
@@ -97,7 +98,8 @@ def read_storage(
97
98
  if anon:
98
99
  client_config = (client_config or {}) | {"anon": True}
99
100
  session = Session.get(session, client_config=client_config, in_memory=in_memory)
100
- cache = session.catalog.cache
101
+ catalog = session.catalog
102
+ cache = catalog.cache
101
103
  client_config = session.catalog.client_config
102
104
 
103
105
  uris = uri if isinstance(uri, (list, tuple)) else [uri]
@@ -130,6 +132,11 @@ def read_storage(
130
132
 
131
133
  def lst_fn(ds_name, lst_uri):
132
134
  # disable prefetch for listing, as it pre-downloads all files
135
+ try:
136
+ version = catalog.get_dataset(ds_name).next_version_major
137
+ except DatasetNotFoundError:
138
+ version = None
139
+
133
140
  (
134
141
  read_records(
135
142
  DataChain.DEFAULT_FILE_RECORD,
@@ -142,7 +149,8 @@ def read_storage(
142
149
  list_bucket(lst_uri, cache, client_config=client_config),
143
150
  output={f"{column}": file_type},
144
151
  )
145
- .save(ds_name, listing=True)
152
+ # for internal listing datasets, we always bump major version
153
+ .save(ds_name, listing=True, version=version)
146
154
  )
147
155
 
148
156
  dc._query.set_listing_fn(
datachain/lib/pytorch.py CHANGED
@@ -43,7 +43,7 @@ class PytorchDataset(IterableDataset):
43
43
  def __init__(
44
44
  self,
45
45
  name: str,
46
- version: Optional[int] = None,
46
+ version: Optional[str] = None,
47
47
  catalog: Optional["Catalog"] = None,
48
48
  transform: Optional["Transform"] = None,
49
49
  tokenizer: Optional[Callable] = None,
@@ -60,7 +60,7 @@ class PytorchDataset(IterableDataset):
60
60
 
61
61
  Args:
62
62
  name (str): Name of DataChain dataset to stream.
63
- version (int): Version of DataChain dataset to stream.
63
+ version (str): Version of DataChain dataset to stream.
64
64
  catalog (Catalog): DataChain catalog to which dataset belongs.
65
65
  transform (Transform): Torchvision transforms to apply to the dataset.
66
66
  tokenizer (Callable): Tokenizer to use to tokenize text values.
datachain/listing.py CHANGED
@@ -26,7 +26,7 @@ class Listing:
26
26
  warehouse: "AbstractWarehouse",
27
27
  client: "Client",
28
28
  dataset_name: Optional["str"] = None,
29
- dataset_version: Optional[int] = None,
29
+ dataset_version: Optional[str] = None,
30
30
  column: str = "file",
31
31
  ):
32
32
  self.metastore = metastore
@@ -83,7 +83,7 @@ PartitionByType = Union[
83
83
  Function, ColumnElement, Sequence[Union[Function, ColumnElement]]
84
84
  ]
85
85
  JoinPredicateType = Union[str, ColumnClause, ColumnElement]
86
- DatasetDependencyType = tuple[str, int]
86
+ DatasetDependencyType = tuple[str, str]
87
87
 
88
88
  logger = logging.getLogger("datachain")
89
89
 
@@ -168,7 +168,7 @@ class Step(ABC):
168
168
  class QueryStep:
169
169
  catalog: "Catalog"
170
170
  dataset_name: str
171
- dataset_version: int
171
+ dataset_version: str
172
172
 
173
173
  def apply(self):
174
174
  def q(*columns):
@@ -1092,7 +1092,7 @@ class DatasetQuery:
1092
1092
  def __init__(
1093
1093
  self,
1094
1094
  name: str,
1095
- version: Optional[int] = None,
1095
+ version: Optional[str] = None,
1096
1096
  catalog: Optional["Catalog"] = None,
1097
1097
  session: Optional[Session] = None,
1098
1098
  indexing_column_types: Optional[dict[str, Any]] = None,
@@ -1112,7 +1112,7 @@ class DatasetQuery:
1112
1112
  self.table = self.get_table()
1113
1113
  self.starting_step: Optional[QueryStep] = None
1114
1114
  self.name: Optional[str] = None
1115
- self.version: Optional[int] = None
1115
+ self.version: Optional[str] = None
1116
1116
  self.feature_schema: Optional[dict] = None
1117
1117
  self.column_types: Optional[dict[str, Any]] = None
1118
1118
  self.before_steps: list[Callable] = []
@@ -1155,7 +1155,7 @@ class DatasetQuery:
1155
1155
  def __or__(self, other):
1156
1156
  return self.union(other)
1157
1157
 
1158
- def pull_dataset(self, name: str, version: Optional[int] = None) -> "DatasetRecord":
1158
+ def pull_dataset(self, name: str, version: Optional[str] = None) -> "DatasetRecord":
1159
1159
  print("Dataset not found in local catalog, trying to get from studio")
1160
1160
 
1161
1161
  remote_ds_uri = f"{DATASET_PREFIX}{name}"
@@ -1185,8 +1185,8 @@ class DatasetQuery:
1185
1185
  it completely. If this is the case, name and version of underlying dataset
1186
1186
  will be defined.
1187
1187
  DatasetQuery instance can become attached in two scenarios:
1188
- 1. ds = DatasetQuery(name="dogs", version=1) -> ds is attached to dogs
1189
- 2. ds = ds.save("dogs", version=1) -> ds is attached to dogs dataset
1188
+ 1. ds = DatasetQuery(name="dogs", version="1.0.0") -> ds is attached to dogs
1189
+ 2. ds = ds.save("dogs", version="1.0.0") -> ds is attached to dogs dataset
1190
1190
  It can move to detached state if filter or similar methods are called on it,
1191
1191
  as then it no longer 100% represents underlying datasets.
1192
1192
  """
@@ -1663,7 +1663,7 @@ class DatasetQuery:
1663
1663
  )
1664
1664
  return query
1665
1665
 
1666
- def _add_dependencies(self, dataset: "DatasetRecord", version: int):
1666
+ def _add_dependencies(self, dataset: "DatasetRecord", version: str):
1667
1667
  for dependency in self.dependencies:
1668
1668
  ds_dependency_name, ds_dependency_version = dependency
1669
1669
  self.catalog.metastore.add_dataset_dependency(
@@ -1685,7 +1685,7 @@ class DatasetQuery:
1685
1685
  def save(
1686
1686
  self,
1687
1687
  name: Optional[str] = None,
1688
- version: Optional[int] = None,
1688
+ version: Optional[str] = None,
1689
1689
  feature_schema: Optional[dict] = None,
1690
1690
  description: Optional[str] = None,
1691
1691
  attrs: Optional[list[str]] = None,
@@ -69,7 +69,7 @@ class Session:
69
69
  self.catalog = catalog or get_catalog(
70
70
  client_config=client_config, in_memory=in_memory
71
71
  )
72
- self.dataset_versions: list[tuple[DatasetRecord, int, bool]] = []
72
+ self.dataset_versions: list[tuple[DatasetRecord, str, bool]] = []
73
73
 
74
74
  def __enter__(self):
75
75
  # Push the current context onto the stack
@@ -90,7 +90,7 @@ class Session:
90
90
  Session.SESSION_CONTEXTS.pop()
91
91
 
92
92
  def add_dataset_version(
93
- self, dataset: "DatasetRecord", version: int, listing: bool = False
93
+ self, dataset: "DatasetRecord", version: str, listing: bool = False
94
94
  ) -> None:
95
95
  self.dataset_versions.append((dataset, version, listing))
96
96
 
@@ -307,7 +307,7 @@ class StudioClient:
307
307
  def rm_dataset(
308
308
  self,
309
309
  name: str,
310
- version: Optional[int] = None,
310
+ version: Optional[str] = None,
311
311
  force: Optional[bool] = False,
312
312
  ) -> Response[DatasetInfoData]:
313
313
  return self._send_request(
@@ -336,7 +336,7 @@ class StudioClient:
336
336
  return response
337
337
 
338
338
  def dataset_rows_chunk(
339
- self, name: str, version: int, offset: int
339
+ self, name: str, version: str, offset: int
340
340
  ) -> Response[DatasetRowsData]:
341
341
  req_data = {"dataset_name": name, "dataset_version": version}
342
342
  return self._send_request_msgpack(
@@ -353,7 +353,7 @@ class StudioClient:
353
353
  )
354
354
 
355
355
  def export_dataset_table(
356
- self, name: str, version: int
356
+ self, name: str, version: str
357
357
  ) -> Response[DatasetExportSignedUrls]:
358
358
  return self._send_request(
359
359
  "datachain/datasets/export",
@@ -362,7 +362,7 @@ class StudioClient:
362
362
  )
363
363
 
364
364
  def dataset_export_status(
365
- self, name: str, version: int
365
+ self, name: str, version: str
366
366
  ) -> Response[DatasetExportStatus]:
367
367
  return self._send_request(
368
368
  "datachain/datasets/export-status",