datachain 0.8.13__py3-none-any.whl → 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

datachain/__init__.py CHANGED
@@ -4,9 +4,14 @@ from datachain.lib.file import (
4
4
  ArrowRow,
5
5
  File,
6
6
  FileError,
7
+ Image,
7
8
  ImageFile,
8
9
  TarVFile,
9
10
  TextFile,
11
+ Video,
12
+ VideoFile,
13
+ VideoFragment,
14
+ VideoFrame,
10
15
  )
11
16
  from datachain.lib.model_store import ModelStore
12
17
  from datachain.lib.udf import Aggregator, Generator, Mapper
@@ -27,6 +32,7 @@ __all__ = [
27
32
  "File",
28
33
  "FileError",
29
34
  "Generator",
35
+ "Image",
30
36
  "ImageFile",
31
37
  "Mapper",
32
38
  "ModelStore",
@@ -34,6 +40,10 @@ __all__ = [
34
40
  "Sys",
35
41
  "TarVFile",
36
42
  "TextFile",
43
+ "Video",
44
+ "VideoFile",
45
+ "VideoFragment",
46
+ "VideoFrame",
37
47
  "is_chain_type",
38
48
  "metrics",
39
49
  "param",
@@ -89,10 +89,6 @@ PULL_DATASET_SLEEP_INTERVAL = 0.1 # sleep time while waiting for chunk to be av
89
89
  PULL_DATASET_CHECK_STATUS_INTERVAL = 20 # interval to check export status in Studio
90
90
 
91
91
 
92
- def raise_remote_error(error_message: str) -> NoReturn:
93
- raise DataChainError(f"Error from server: {error_message}")
94
-
95
-
96
92
  def noop(_: str):
97
93
  pass
98
94
 
@@ -211,14 +207,14 @@ class DatasetRowsFetcher(NodesThreadPool):
211
207
  self.remote_ds_name, self.remote_ds_version
212
208
  )
213
209
  if not export_status_response.ok:
214
- raise_remote_error(export_status_response.message)
210
+ raise DataChainError(export_status_response.message)
215
211
 
216
212
  export_status = export_status_response.data["status"] # type: ignore [index]
217
213
 
218
214
  if export_status == "failed":
219
- raise_remote_error("Dataset export failed in Studio")
215
+ raise DataChainError("Dataset export failed in Studio")
220
216
  if export_status == "removed":
221
- raise_remote_error("Dataset export removed in Studio")
217
+ raise DataChainError("Dataset export removed in Studio")
222
218
 
223
219
  self.last_status_check = time.time()
224
220
 
@@ -1101,6 +1097,31 @@ class Catalog:
1101
1097
  def get_dataset(self, name: str) -> DatasetRecord:
1102
1098
  return self.metastore.get_dataset(name)
1103
1099
 
1100
+ def get_dataset_with_remote_fallback(
1101
+ self, name: str, version: Optional[int] = None
1102
+ ) -> DatasetRecord:
1103
+ try:
1104
+ ds = self.get_dataset(name)
1105
+ if version and not ds.has_version(version):
1106
+ raise DatasetVersionNotFoundError(
1107
+ f"Dataset {name} does not have version {version}"
1108
+ )
1109
+ return ds
1110
+
1111
+ except (DatasetNotFoundError, DatasetVersionNotFoundError):
1112
+ print("Dataset not found in local catalog, trying to get from studio")
1113
+
1114
+ remote_ds_uri = f"{DATASET_PREFIX}{name}"
1115
+ if version:
1116
+ remote_ds_uri += f"@v{version}"
1117
+
1118
+ self.pull_dataset(
1119
+ remote_ds_uri=remote_ds_uri,
1120
+ local_ds_name=name,
1121
+ local_ds_version=version,
1122
+ )
1123
+ return self.get_dataset(name)
1124
+
1104
1125
  def get_dataset_with_version_uuid(self, uuid: str) -> DatasetRecord:
1105
1126
  """Returns dataset that contains version with specific uuid"""
1106
1127
  for dataset in self.ls_datasets():
@@ -1113,7 +1134,7 @@ class Catalog:
1113
1134
 
1114
1135
  info_response = studio_client.dataset_info(name)
1115
1136
  if not info_response.ok:
1116
- raise_remote_error(info_response.message)
1137
+ raise DataChainError(info_response.message)
1117
1138
 
1118
1139
  dataset_info = info_response.data
1119
1140
  assert isinstance(dataset_info, dict)
@@ -1209,6 +1230,8 @@ class Catalog:
1209
1230
  **kwargs,
1210
1231
  ) -> str:
1211
1232
  client_config = client_config or self.client_config
1233
+ if client_config.get("anon"):
1234
+ content_disposition = None
1212
1235
  client = Client.get_client(source, self.cache, **client_config)
1213
1236
  return client.url(
1214
1237
  path,
@@ -1407,7 +1430,7 @@ class Catalog:
1407
1430
  remote_ds_name, remote_ds_version.version
1408
1431
  )
1409
1432
  if not export_response.ok:
1410
- raise_remote_error(export_response.message)
1433
+ raise DataChainError(export_response.message)
1411
1434
 
1412
1435
  signed_urls = export_response.data
1413
1436
 
datachain/cli/__init__.py CHANGED
@@ -160,6 +160,8 @@ def handle_dataset_command(args, catalog):
160
160
  local=args.local,
161
161
  all=args.all,
162
162
  team=args.team,
163
+ latest_only=not args.versions,
164
+ name=args.name,
163
165
  ),
164
166
  "rm": lambda: rm_dataset(
165
167
  catalog,
@@ -12,49 +12,115 @@ from datachain.error import DatasetNotFoundError
12
12
  from datachain.studio import list_datasets as list_datasets_studio
13
13
 
14
14
 
15
+ def group_dataset_versions(datasets, latest_only=True):
16
+ grouped = {}
17
+ # Sort to ensure groupby works as expected
18
+ # (groupby expects consecutive items with the same key)
19
+ for name, version in sorted(datasets):
20
+ grouped.setdefault(name, []).append(version)
21
+
22
+ if latest_only:
23
+ # For each dataset name, pick the highest version.
24
+ return {name: max(versions) for name, versions in grouped.items()}
25
+ # For each dataset name, return a sorted list of unique versions.
26
+ return {name: sorted(set(versions)) for name, versions in grouped.items()}
27
+
28
+
15
29
  def list_datasets(
16
30
  catalog: "Catalog",
17
31
  studio: bool = False,
18
32
  local: bool = False,
19
33
  all: bool = True,
20
34
  team: Optional[str] = None,
35
+ latest_only: bool = True,
36
+ name: Optional[str] = None,
21
37
  ):
22
38
  token = Config().read().get("studio", {}).get("token")
23
39
  all, local, studio = determine_flavors(studio, local, all, token)
40
+ if name:
41
+ latest_only = False
24
42
 
25
- local_datasets = set(list_datasets_local(catalog)) if all or local else set()
43
+ local_datasets = set(list_datasets_local(catalog, name)) if all or local else set()
26
44
  studio_datasets = (
27
- set(list_datasets_studio(team=team)) if (all or studio) and token else set()
45
+ set(list_datasets_studio(team=team, name=name))
46
+ if (all or studio) and token
47
+ else set()
28
48
  )
29
49
 
50
+ # Group the datasets for both local and studio sources.
51
+ local_grouped = group_dataset_versions(local_datasets, latest_only)
52
+ studio_grouped = group_dataset_versions(studio_datasets, latest_only)
53
+
54
+ # Merge all dataset names from both sources.
55
+ all_dataset_names = sorted(set(local_grouped.keys()) | set(studio_grouped.keys()))
56
+
57
+ datasets = []
58
+ if latest_only:
59
+ # For each dataset name, get the latest version from each source (if available).
60
+ for n in all_dataset_names:
61
+ datasets.append((n, (local_grouped.get(n), studio_grouped.get(n))))
62
+ else:
63
+ # For each dataset name, merge all versions from both sources.
64
+ for n in all_dataset_names:
65
+ local_versions = local_grouped.get(n, [])
66
+ studio_versions = studio_grouped.get(n, [])
67
+
68
+ # If neither source has any versions, record it as (None, None).
69
+ if not local_versions and not studio_versions:
70
+ datasets.append((n, (None, None)))
71
+ else:
72
+ # For each unique version from either source, record its presence.
73
+ for version in sorted(set(local_versions) | set(studio_versions)):
74
+ datasets.append(
75
+ (
76
+ n,
77
+ (
78
+ version if version in local_versions else None,
79
+ version if version in studio_versions else None,
80
+ ),
81
+ )
82
+ )
83
+
30
84
  rows = [
31
85
  _datasets_tabulate_row(
32
- name=name,
33
- version=version,
86
+ name=n,
34
87
  both=(all or (local and studio)) and token,
35
- local=(name, version) in local_datasets,
36
- studio=(name, version) in studio_datasets,
88
+ local_version=local_version,
89
+ studio_version=studio_version,
37
90
  )
38
- for name, version in local_datasets.union(studio_datasets)
91
+ for n, (local_version, studio_version) in datasets
39
92
  ]
40
93
 
41
94
  print(tabulate(rows, headers="keys"))
42
95
 
43
96
 
44
- def list_datasets_local(catalog: "Catalog"):
97
+ def list_datasets_local(catalog: "Catalog", name: Optional[str] = None):
98
+ if name:
99
+ yield from list_datasets_local_versions(catalog, name)
100
+ return
101
+
45
102
  for d in catalog.ls_datasets():
46
103
  for v in d.versions:
47
104
  yield (d.name, v.version)
48
105
 
49
106
 
50
- def _datasets_tabulate_row(name, version, both, local, studio):
107
+ def list_datasets_local_versions(catalog: "Catalog", name: str):
108
+ ds = catalog.get_dataset(name)
109
+ for v in ds.versions:
110
+ yield (name, v.version)
111
+
112
+
113
+ def _datasets_tabulate_row(name, both, local_version, studio_version):
51
114
  row = {
52
115
  "Name": name,
53
- "Version": version,
54
116
  }
55
117
  if both:
56
- row["Studio"] = "\u2714" if studio else "\u2716"
57
- row["Local"] = "\u2714" if local else "\u2716"
118
+ row["Studio"] = f"v{studio_version}" if studio_version else "\u2716"
119
+ row["Local"] = f"v{local_version}" if local_version else "\u2716"
120
+ else:
121
+ latest_version = local_version or studio_version
122
+ row["Latest Version"] = f"v{latest_version}" if latest_version else "\u2716"
123
+
58
124
  return row
59
125
 
60
126
 
@@ -1,5 +1,4 @@
1
1
  import argparse
2
- from argparse import ArgumentParser
3
2
  from importlib.metadata import PackageNotFoundError, version
4
3
 
5
4
  import shtab
@@ -10,12 +9,16 @@ from .job import add_jobs_parser
10
9
  from .studio import add_auth_parser
11
10
  from .utils import (
12
11
  FIND_COLUMNS,
12
+ CustomHelpFormatter,
13
13
  add_anon_arg,
14
14
  add_show_args,
15
15
  add_sources_arg,
16
16
  add_update_arg,
17
17
  find_columns_type,
18
18
  )
19
+ from .utils import (
20
+ CustomArgumentParser as ArgumentParser,
21
+ )
19
22
 
20
23
 
21
24
  def get_parser() -> ArgumentParser: # noqa: PLR0915
@@ -28,10 +31,11 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
28
31
  parser = ArgumentParser(
29
32
  description="DataChain: Wrangle unstructured AI data at scale.",
30
33
  prog="datachain",
34
+ formatter_class=CustomHelpFormatter,
31
35
  )
32
36
  parser.add_argument("-V", "--version", action="version", version=__version__)
33
37
 
34
- parent_parser = ArgumentParser(add_help=False)
38
+ parent_parser = ArgumentParser(add_help=False, formatter_class=CustomHelpFormatter)
35
39
  parent_parser.add_argument(
36
40
  "-v", "--verbose", action="count", default=0, help="Be verbose"
37
41
  )
@@ -59,7 +63,10 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
59
63
  help=f"Use `{parser.prog} command --help` for command-specific help",
60
64
  )
61
65
  parse_cp = subp.add_parser(
62
- "cp", parents=[parent_parser], description="Copy data files from the cloud."
66
+ "cp",
67
+ parents=[parent_parser],
68
+ description="Copy data files from the cloud.",
69
+ formatter_class=CustomHelpFormatter,
63
70
  )
64
71
  add_sources_arg(parse_cp).complete = shtab.DIR # type: ignore[attr-defined]
65
72
  parse_cp.add_argument(
@@ -90,7 +97,10 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
90
97
  add_update_arg(parse_cp)
91
98
 
92
99
  parse_clone = subp.add_parser(
93
- "clone", parents=[parent_parser], description="Copy data files from the cloud."
100
+ "clone",
101
+ parents=[parent_parser],
102
+ description="Copy data files from the cloud.",
103
+ formatter_class=CustomHelpFormatter,
94
104
  )
95
105
  add_sources_arg(parse_clone).complete = shtab.DIR # type: ignore[attr-defined]
96
106
  parse_clone.add_argument(
@@ -134,6 +144,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
134
144
  aliases=["ds"],
135
145
  parents=[parent_parser],
136
146
  description="Commands for managing datasets.",
147
+ formatter_class=CustomHelpFormatter,
137
148
  )
138
149
  add_anon_arg(datasets_parser)
139
150
  datasets_subparser = datasets_parser.add_subparsers(
@@ -145,6 +156,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
145
156
  "pull",
146
157
  parents=[parent_parser],
147
158
  description="Pull specific dataset version from Studio.",
159
+ formatter_class=CustomHelpFormatter,
148
160
  )
149
161
  parse_pull.add_argument(
150
162
  "dataset",
@@ -188,7 +200,10 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
188
200
  )
189
201
 
190
202
  parse_edit_dataset = datasets_subparser.add_parser(
191
- "edit", parents=[parent_parser], description="Edit dataset metadata."
203
+ "edit",
204
+ parents=[parent_parser],
205
+ description="Edit dataset metadata.",
206
+ formatter_class=CustomHelpFormatter,
192
207
  )
193
208
  parse_edit_dataset.add_argument("name", type=str, help="Dataset name")
194
209
  parse_edit_dataset.add_argument(
@@ -234,7 +249,19 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
234
249
  )
235
250
 
236
251
  datasets_ls_parser = datasets_subparser.add_parser(
237
- "ls", parents=[parent_parser], description="List datasets."
252
+ "ls",
253
+ parents=[parent_parser],
254
+ description="List datasets.",
255
+ formatter_class=CustomHelpFormatter,
256
+ )
257
+ datasets_ls_parser.add_argument(
258
+ "name", action="store", help="Name of the dataset to list", nargs="?"
259
+ )
260
+ datasets_ls_parser.add_argument(
261
+ "--versions",
262
+ action="store_true",
263
+ default=False,
264
+ help="List all the versions of each dataset",
238
265
  )
239
266
  datasets_ls_parser.add_argument(
240
267
  "--studio",
@@ -264,7 +291,11 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
264
291
  )
265
292
 
266
293
  rm_dataset_parser = datasets_subparser.add_parser(
267
- "rm", parents=[parent_parser], description="Remove dataset.", aliases=["remove"]
294
+ "rm",
295
+ parents=[parent_parser],
296
+ description="Remove dataset.",
297
+ aliases=["remove"],
298
+ formatter_class=CustomHelpFormatter,
268
299
  )
269
300
  rm_dataset_parser.add_argument("name", type=str, help="Dataset name")
270
301
  rm_dataset_parser.add_argument(
@@ -308,7 +339,10 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
308
339
  )
309
340
 
310
341
  parse_ls = subp.add_parser(
311
- "ls", parents=[parent_parser], description="List storage contents."
342
+ "ls",
343
+ parents=[parent_parser],
344
+ description="List storage contents.",
345
+ formatter_class=CustomHelpFormatter,
312
346
  )
313
347
  add_anon_arg(parse_ls)
314
348
  add_update_arg(parse_ls)
@@ -348,7 +382,10 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
348
382
  )
349
383
 
350
384
  parse_du = subp.add_parser(
351
- "du", parents=[parent_parser], description="Display space usage."
385
+ "du",
386
+ parents=[parent_parser],
387
+ description="Display space usage.",
388
+ formatter_class=CustomHelpFormatter,
352
389
  )
353
390
  add_sources_arg(parse_du)
354
391
  add_anon_arg(parse_du)
@@ -380,7 +417,10 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
380
417
  )
381
418
 
382
419
  parse_find = subp.add_parser(
383
- "find", parents=[parent_parser], description="Search in a directory hierarchy."
420
+ "find",
421
+ parents=[parent_parser],
422
+ description="Search in a directory hierarchy.",
423
+ formatter_class=CustomHelpFormatter,
384
424
  )
385
425
  add_anon_arg(parse_find)
386
426
  add_update_arg(parse_find)
@@ -435,7 +475,10 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
435
475
  )
436
476
 
437
477
  parse_index = subp.add_parser(
438
- "index", parents=[parent_parser], description="Index storage location."
478
+ "index",
479
+ parents=[parent_parser],
480
+ description="Index storage location.",
481
+ formatter_class=CustomHelpFormatter,
439
482
  )
440
483
  add_anon_arg(parse_index)
441
484
  add_update_arg(parse_index)
@@ -445,6 +488,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
445
488
  "show",
446
489
  parents=[parent_parser],
447
490
  description="Create a new dataset with a query script.",
491
+ formatter_class=CustomHelpFormatter,
448
492
  )
449
493
  show_parser.add_argument("name", type=str, help="Dataset name")
450
494
  show_parser.add_argument(
@@ -461,6 +505,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
461
505
  "query",
462
506
  parents=[parent_parser],
463
507
  description="Create a new dataset with a query script.",
508
+ formatter_class=CustomHelpFormatter,
464
509
  )
465
510
  add_anon_arg(query_parser)
466
511
  query_parser.add_argument(
@@ -491,11 +536,15 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
491
536
  "clear-cache",
492
537
  parents=[parent_parser],
493
538
  description="Clear the local file cache.",
539
+ formatter_class=CustomHelpFormatter,
494
540
  )
495
541
  add_anon_arg(parse_clear_cache)
496
542
 
497
543
  parse_gc = subp.add_parser(
498
- "gc", parents=[parent_parser], description="Garbage collect temporary tables."
544
+ "gc",
545
+ parents=[parent_parser],
546
+ description="Garbage collect temporary tables.",
547
+ formatter_class=CustomHelpFormatter,
499
548
  )
500
549
  add_anon_arg(parse_gc)
501
550
 
@@ -510,6 +559,7 @@ def add_completion_parser(subparsers, parents):
510
559
  "completion",
511
560
  parents=parents,
512
561
  description="Output shell completion script.",
562
+ formatter_class=CustomHelpFormatter,
513
563
  )
514
564
  parser.add_argument(
515
565
  "-s",
@@ -1,8 +1,15 @@
1
+ from datachain.cli.parser.utils import CustomHelpFormatter
2
+
3
+
1
4
  def add_jobs_parser(subparsers, parent_parser) -> None:
2
5
  jobs_help = "Manage jobs in Studio"
3
6
  jobs_description = "Commands to manage job execution in Studio."
4
7
  jobs_parser = subparsers.add_parser(
5
- "job", parents=[parent_parser], description=jobs_description, help=jobs_help
8
+ "job",
9
+ parents=[parent_parser],
10
+ description=jobs_description,
11
+ help=jobs_help,
12
+ formatter_class=CustomHelpFormatter,
6
13
  )
7
14
  jobs_subparser = jobs_parser.add_subparsers(
8
15
  dest="cmd",
@@ -17,10 +24,11 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
17
24
  parents=[parent_parser],
18
25
  description=studio_run_description,
19
26
  help=studio_run_help,
27
+ formatter_class=CustomHelpFormatter,
20
28
  )
21
29
 
22
30
  studio_run_parser.add_argument(
23
- "query_file",
31
+ "file",
24
32
  action="store",
25
33
  help="Query file to run",
26
34
  )
@@ -78,10 +86,11 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
78
86
  parents=[parent_parser],
79
87
  description=studio_cancel_description,
80
88
  help=studio_cancel_help,
89
+ formatter_class=CustomHelpFormatter,
81
90
  )
82
91
 
83
92
  studio_cancel_parser.add_argument(
84
- "job_id",
93
+ "id",
85
94
  action="store",
86
95
  help="Job ID to cancel",
87
96
  )
@@ -100,10 +109,11 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
100
109
  parents=[parent_parser],
101
110
  description=studio_log_description,
102
111
  help=studio_log_help,
112
+ formatter_class=CustomHelpFormatter,
103
113
  )
104
114
 
105
115
  studio_log_parser.add_argument(
106
- "job_id",
116
+ "id",
107
117
  action="store",
108
118
  help="Job ID to show logs for",
109
119
  )
@@ -1,3 +1,6 @@
1
+ from datachain.cli.parser.utils import CustomHelpFormatter
2
+
3
+
1
4
  def add_auth_parser(subparsers, parent_parser) -> None:
2
5
  from dvc_studio_client.auth import AVAILABLE_SCOPES
3
6
 
@@ -9,6 +12,7 @@ def add_auth_parser(subparsers, parent_parser) -> None:
9
12
  parents=[parent_parser],
10
13
  description=auth_description,
11
14
  help=auth_help,
15
+ formatter_class=CustomHelpFormatter,
12
16
  )
13
17
  auth_subparser = auth_parser.add_subparsers(
14
18
  dest="cmd",
@@ -27,6 +31,7 @@ def add_auth_parser(subparsers, parent_parser) -> None:
27
31
  parents=[parent_parser],
28
32
  description=auth_login_description,
29
33
  help=auth_login_help,
34
+ formatter_class=CustomHelpFormatter,
30
35
  )
31
36
 
32
37
  login_parser.add_argument(
@@ -69,6 +74,7 @@ def add_auth_parser(subparsers, parent_parser) -> None:
69
74
  parents=[parent_parser],
70
75
  description=auth_logout_description,
71
76
  help=auth_logout_help,
77
+ formatter_class=CustomHelpFormatter,
72
78
  )
73
79
 
74
80
  auth_team_help = "Set default team for Studio operations"
@@ -79,6 +85,7 @@ def add_auth_parser(subparsers, parent_parser) -> None:
79
85
  parents=[parent_parser],
80
86
  description=auth_team_description,
81
87
  help=auth_team_help,
88
+ formatter_class=CustomHelpFormatter,
82
89
  )
83
90
  team_parser.add_argument(
84
91
  "team_name",
@@ -100,4 +107,5 @@ def add_auth_parser(subparsers, parent_parser) -> None:
100
107
  parents=[parent_parser],
101
108
  description=auth_token_description,
102
109
  help=auth_token_help,
110
+ formatter_class=CustomHelpFormatter,
103
111
  )
@@ -1,4 +1,4 @@
1
- from argparse import Action, ArgumentParser, ArgumentTypeError
1
+ from argparse import Action, ArgumentParser, ArgumentTypeError, HelpFormatter
2
2
  from typing import Union
3
3
 
4
4
  from datachain.cli.utils import CommaSeparatedArgs
@@ -6,6 +6,25 @@ from datachain.cli.utils import CommaSeparatedArgs
6
6
  FIND_COLUMNS = ["du", "name", "path", "size", "type"]
7
7
 
8
8
 
9
+ class CustomHelpFormatter(HelpFormatter):
10
+ def add_arguments(self, actions):
11
+ # Sort arguments to move --help and --version to the end
12
+ normal_actions = [
13
+ a for a in actions if a.dest not in ("help", "verbose", "quiet")
14
+ ]
15
+ special_actions = [a for a in actions if a.dest in ("help", "verbose", "quiet")]
16
+ super().add_arguments(normal_actions + special_actions)
17
+
18
+
19
+ class CustomArgumentParser(ArgumentParser):
20
+ def error(self, message):
21
+ internal_commands = ["internal-run-udf", "internal-run-udf-worker"]
22
+
23
+ hidden_portion = "".join(f"'{cmd}', " for cmd in internal_commands)
24
+ message = message.replace(hidden_portion, "")
25
+ super().error(message)
26
+
27
+
9
28
  def find_columns_type(
10
29
  columns_str: str,
11
30
  default_colums_str: str = "path",
datachain/dataset.py CHANGED
@@ -181,7 +181,7 @@ class DatasetVersion:
181
181
 
182
182
  @classmethod
183
183
  def parse( # noqa: PLR0913
184
- cls: type[V],
184
+ cls,
185
185
  id: int,
186
186
  uuid: str,
187
187
  dataset_id: int,
@@ -288,7 +288,7 @@ class DatasetListVersion:
288
288
 
289
289
  @classmethod
290
290
  def parse(
291
- cls: type[LV],
291
+ cls,
292
292
  id: int,
293
293
  uuid: str,
294
294
  dataset_id: int,
@@ -352,7 +352,7 @@ class DatasetRecord:
352
352
 
353
353
  @classmethod
354
354
  def parse( # noqa: PLR0913
355
- cls: type[T],
355
+ cls,
356
356
  id: int,
357
357
  name: str,
358
358
  description: Optional[str],
@@ -567,7 +567,7 @@ class DatasetListRecord:
567
567
 
568
568
  @classmethod
569
569
  def parse( # noqa: PLR0913
570
- cls: type[LT],
570
+ cls,
571
571
  id: int,
572
572
  name: str,
573
573
  description: Optional[str],
@@ -628,6 +628,9 @@ class DatasetListRecord:
628
628
  self.versions.sort(key=lambda v: v.version)
629
629
  return self
630
630
 
631
+ def latest_version(self) -> DatasetListVersion:
632
+ return max(self.versions, key=lambda v: v.version)
633
+
631
634
  @property
632
635
  def is_bucket_listing(self) -> bool:
633
636
  """