datachain 0.7.2__py3-none-any.whl → 0.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -38,6 +38,7 @@ from datachain.dataset import (
38
38
  DATASET_PREFIX,
39
39
  QUERY_DATASET_PREFIX,
40
40
  DatasetDependency,
41
+ DatasetListRecord,
41
42
  DatasetRecord,
42
43
  DatasetStats,
43
44
  DatasetStatus,
@@ -72,7 +73,7 @@ if TYPE_CHECKING:
72
73
  AbstractMetastore,
73
74
  AbstractWarehouse,
74
75
  )
75
- from datachain.dataset import DatasetVersion
76
+ from datachain.dataset import DatasetListVersion
76
77
  from datachain.job import Job
77
78
  from datachain.lib.file import File
78
79
  from datachain.listing import Listing
@@ -1135,7 +1136,7 @@ class Catalog:
1135
1136
 
1136
1137
  return direct_dependencies
1137
1138
 
1138
- def ls_datasets(self, include_listing: bool = False) -> Iterator[DatasetRecord]:
1139
+ def ls_datasets(self, include_listing: bool = False) -> Iterator[DatasetListRecord]:
1139
1140
  datasets = self.metastore.list_datasets()
1140
1141
  for d in datasets:
1141
1142
  if not d.is_bucket_listing or include_listing:
@@ -1144,7 +1145,7 @@ class Catalog:
1144
1145
  def list_datasets_versions(
1145
1146
  self,
1146
1147
  include_listing: bool = False,
1147
- ) -> Iterator[tuple[DatasetRecord, "DatasetVersion", Optional["Job"]]]:
1148
+ ) -> Iterator[tuple[DatasetListRecord, "DatasetListVersion", Optional["Job"]]]:
1148
1149
  """Iterate over all dataset versions with related jobs."""
1149
1150
  datasets = list(self.ls_datasets(include_listing=include_listing))
1150
1151
 
datachain/cli.py CHANGED
@@ -18,7 +18,12 @@ from datachain.cli_utils import BooleanOptionalAction, CommaSeparatedArgs, KeyVa
18
18
  from datachain.config import Config
19
19
  from datachain.error import DataChainError
20
20
  from datachain.lib.dc import DataChain
21
- from datachain.studio import list_datasets, process_studio_cli_args
21
+ from datachain.studio import (
22
+ edit_studio_dataset,
23
+ list_datasets,
24
+ process_studio_cli_args,
25
+ remove_studio_dataset,
26
+ )
22
27
  from datachain.telemetry import telemetry
23
28
 
24
29
  if TYPE_CHECKING:
@@ -403,21 +408,44 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
403
408
  parse_edit_dataset.add_argument(
404
409
  "--new-name",
405
410
  action="store",
406
- default="",
407
411
  help="Dataset new name",
408
412
  )
409
413
  parse_edit_dataset.add_argument(
410
414
  "--description",
411
415
  action="store",
412
- default="",
413
416
  help="Dataset description",
414
417
  )
415
418
  parse_edit_dataset.add_argument(
416
419
  "--labels",
417
- default=[],
418
420
  nargs="+",
419
421
  help="Dataset labels",
420
422
  )
423
+ parse_edit_dataset.add_argument(
424
+ "--studio",
425
+ action="store_true",
426
+ default=False,
427
+ help="Edit dataset from Studio",
428
+ )
429
+ parse_edit_dataset.add_argument(
430
+ "-L",
431
+ "--local",
432
+ action="store_true",
433
+ default=False,
434
+ help="Edit local dataset only",
435
+ )
436
+ parse_edit_dataset.add_argument(
437
+ "-a",
438
+ "--all",
439
+ action="store_true",
440
+ default=True,
441
+ help="Edit both datasets from studio and local",
442
+ )
443
+ parse_edit_dataset.add_argument(
444
+ "--team",
445
+ action="store",
446
+ default=None,
447
+ help="The team to edit a dataset. By default, it will use team from config.",
448
+ )
421
449
 
422
450
  datasets_parser = subp.add_parser(
423
451
  "datasets", parents=[parent_parser], description="List datasets"
@@ -466,6 +494,32 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
466
494
  action=BooleanOptionalAction,
467
495
  help="Force delete registered dataset with all of it's versions",
468
496
  )
497
+ rm_dataset_parser.add_argument(
498
+ "--studio",
499
+ action="store_true",
500
+ default=False,
501
+ help="Remove dataset from Studio",
502
+ )
503
+ rm_dataset_parser.add_argument(
504
+ "-L",
505
+ "--local",
506
+ action="store_true",
507
+ default=False,
508
+ help="Remove local datasets only",
509
+ )
510
+ rm_dataset_parser.add_argument(
511
+ "-a",
512
+ "--all",
513
+ action="store_true",
514
+ default=True,
515
+ help="Remove both local and studio",
516
+ )
517
+ rm_dataset_parser.add_argument(
518
+ "--team",
519
+ action="store",
520
+ default=None,
521
+ help="The team to delete a dataset. By default, it will use team from config.",
522
+ )
469
523
 
470
524
  dataset_stats_parser = subp.add_parser(
471
525
  "dataset-stats",
@@ -909,8 +963,40 @@ def rm_dataset(
909
963
  name: str,
910
964
  version: Optional[int] = None,
911
965
  force: Optional[bool] = False,
966
+ studio: bool = False,
967
+ local: bool = False,
968
+ all: bool = True,
969
+ team: Optional[str] = None,
970
+ ):
971
+ token = Config().read().get("studio", {}).get("token")
972
+ all, local, studio = _determine_flavors(studio, local, all, token)
973
+
974
+ if all or local:
975
+ catalog.remove_dataset(name, version=version, force=force)
976
+
977
+ if (all or studio) and token:
978
+ remove_studio_dataset(team, name, version, force)
979
+
980
+
981
+ def edit_dataset(
982
+ catalog: "Catalog",
983
+ name: str,
984
+ new_name: Optional[str] = None,
985
+ description: Optional[str] = None,
986
+ labels: Optional[list[str]] = None,
987
+ studio: bool = False,
988
+ local: bool = False,
989
+ all: bool = True,
990
+ team: Optional[str] = None,
912
991
  ):
913
- catalog.remove_dataset(name, version=version, force=force)
992
+ token = Config().read().get("studio", {}).get("token")
993
+ all, local, studio = _determine_flavors(studio, local, all, token)
994
+
995
+ if all or local:
996
+ catalog.edit_dataset(name, new_name, description, labels)
997
+
998
+ if (all or studio) and token:
999
+ edit_studio_dataset(team, name, new_name, description, labels)
914
1000
 
915
1001
 
916
1002
  def dataset_stats(
@@ -1127,11 +1213,16 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
1127
1213
  edatachain_file=args.edatachain_file,
1128
1214
  )
1129
1215
  elif args.command == "edit-dataset":
1130
- catalog.edit_dataset(
1216
+ edit_dataset(
1217
+ catalog,
1131
1218
  args.name,
1132
- description=args.description,
1133
1219
  new_name=args.new_name,
1220
+ description=args.description,
1134
1221
  labels=args.labels,
1222
+ studio=args.studio,
1223
+ local=args.local,
1224
+ all=args.all,
1225
+ team=args.team,
1135
1226
  )
1136
1227
  elif args.command == "ls":
1137
1228
  ls(
@@ -1164,7 +1255,16 @@ def main(argv: Optional[list[str]] = None) -> int: # noqa: C901, PLR0912, PLR09
1164
1255
  schema=args.schema,
1165
1256
  )
1166
1257
  elif args.command == "rm-dataset":
1167
- rm_dataset(catalog, args.name, version=args.version, force=args.force)
1258
+ rm_dataset(
1259
+ catalog,
1260
+ args.name,
1261
+ version=args.version,
1262
+ force=args.force,
1263
+ studio=args.studio,
1264
+ local=args.local,
1265
+ all=args.all,
1266
+ team=args.team,
1267
+ )
1168
1268
  elif args.command == "dataset-stats":
1169
1269
  dataset_stats(
1170
1270
  catalog,
@@ -27,6 +27,8 @@ from datachain.data_storage import JobQueryType, JobStatus
27
27
  from datachain.data_storage.serializer import Serializable
28
28
  from datachain.dataset import (
29
29
  DatasetDependency,
30
+ DatasetListRecord,
31
+ DatasetListVersion,
30
32
  DatasetRecord,
31
33
  DatasetStatus,
32
34
  DatasetVersion,
@@ -59,6 +61,8 @@ class AbstractMetastore(ABC, Serializable):
59
61
 
60
62
  schema: "schema.Schema"
61
63
  dataset_class: type[DatasetRecord] = DatasetRecord
64
+ dataset_list_class: type[DatasetListRecord] = DatasetListRecord
65
+ dataset_list_version_class: type[DatasetListVersion] = DatasetListVersion
62
66
  dependency_class: type[DatasetDependency] = DatasetDependency
63
67
  job_class: type[Job] = Job
64
68
 
@@ -166,11 +170,11 @@ class AbstractMetastore(ABC, Serializable):
166
170
  """
167
171
 
168
172
  @abstractmethod
169
- def list_datasets(self) -> Iterator[DatasetRecord]:
173
+ def list_datasets(self) -> Iterator[DatasetListRecord]:
170
174
  """Lists all datasets."""
171
175
 
172
176
  @abstractmethod
173
- def list_datasets_by_prefix(self, prefix: str) -> Iterator["DatasetRecord"]:
177
+ def list_datasets_by_prefix(self, prefix: str) -> Iterator["DatasetListRecord"]:
174
178
  """Lists all datasets which names start with prefix."""
175
179
 
176
180
  @abstractmethod
@@ -348,6 +352,14 @@ class AbstractDBMetastore(AbstractMetastore):
348
352
  if c.name # type: ignore [attr-defined]
349
353
  ]
350
354
 
355
+ @cached_property
356
+ def _dataset_list_fields(self) -> list[str]:
357
+ return [
358
+ c.name # type: ignore [attr-defined]
359
+ for c in self._datasets_columns()
360
+ if c.name in self.dataset_list_class.__dataclass_fields__ # type: ignore [attr-defined]
361
+ ]
362
+
351
363
  @classmethod
352
364
  def _datasets_versions_columns(cls) -> list["SchemaItem"]:
353
365
  """Datasets versions table columns."""
@@ -390,6 +402,15 @@ class AbstractDBMetastore(AbstractMetastore):
390
402
  if c.name # type: ignore [attr-defined]
391
403
  ]
392
404
 
405
+ @cached_property
406
+ def _dataset_list_version_fields(self) -> list[str]:
407
+ return [
408
+ c.name # type: ignore [attr-defined]
409
+ for c in self._datasets_versions_columns()
410
+ if c.name # type: ignore [attr-defined]
411
+ in self.dataset_list_version_class.__dataclass_fields__
412
+ ]
413
+
393
414
  @classmethod
394
415
  def _datasets_dependencies_columns(cls) -> list["SchemaItem"]:
395
416
  """Datasets dependencies table columns."""
@@ -664,14 +685,25 @@ class AbstractDBMetastore(AbstractMetastore):
664
685
  return None
665
686
  return reduce(lambda ds, version: ds.merge_versions(version), versions)
666
687
 
667
- def _parse_datasets(self, rows) -> Iterator["DatasetRecord"]:
688
+ def _parse_list_dataset(self, rows) -> Optional[DatasetListRecord]:
689
+ versions = [self.dataset_list_class.parse(*r) for r in rows]
690
+ if not versions:
691
+ return None
692
+ return reduce(lambda ds, version: ds.merge_versions(version), versions)
693
+
694
+ def _parse_dataset_list(self, rows) -> Iterator["DatasetListRecord"]:
668
695
  # grouping rows by dataset id
669
696
  for _, g in groupby(rows, lambda r: r[0]):
670
- dataset = self._parse_dataset(list(g))
697
+ dataset = self._parse_list_dataset(list(g))
671
698
  if dataset:
672
699
  yield dataset
673
700
 
674
- def _base_dataset_query(self):
701
+ def _get_dataset_query(
702
+ self,
703
+ dataset_fields: list[str],
704
+ dataset_version_fields: list[str],
705
+ isouter: bool = True,
706
+ ):
675
707
  if not (
676
708
  self.db.has_table(self._datasets.name)
677
709
  and self.db.has_table(self._datasets_versions.name)
@@ -680,23 +712,36 @@ class AbstractDBMetastore(AbstractMetastore):
680
712
 
681
713
  d = self._datasets
682
714
  dv = self._datasets_versions
715
+
683
716
  query = self._datasets_select(
684
- *(getattr(d.c, f) for f in self._dataset_fields),
685
- *(getattr(dv.c, f) for f in self._dataset_version_fields),
717
+ *(getattr(d.c, f) for f in dataset_fields),
718
+ *(getattr(dv.c, f) for f in dataset_version_fields),
686
719
  )
687
- j = d.join(dv, d.c.id == dv.c.dataset_id, isouter=True)
720
+ j = d.join(dv, d.c.id == dv.c.dataset_id, isouter=isouter)
688
721
  return query.select_from(j)
689
722
 
690
- def list_datasets(self) -> Iterator["DatasetRecord"]:
723
+ def _base_dataset_query(self):
724
+ return self._get_dataset_query(
725
+ self._dataset_fields, self._dataset_version_fields
726
+ )
727
+
728
+ def _base_list_datasets_query(self):
729
+ return self._get_dataset_query(
730
+ self._dataset_list_fields, self._dataset_list_version_fields, isouter=False
731
+ )
732
+
733
+ def list_datasets(self) -> Iterator["DatasetListRecord"]:
691
734
  """Lists all datasets."""
692
- yield from self._parse_datasets(self.db.execute(self._base_dataset_query()))
735
+ yield from self._parse_dataset_list(
736
+ self.db.execute(self._base_list_datasets_query())
737
+ )
693
738
 
694
739
  def list_datasets_by_prefix(
695
740
  self, prefix: str, conn=None
696
- ) -> Iterator["DatasetRecord"]:
697
- query = self._base_dataset_query()
741
+ ) -> Iterator["DatasetListRecord"]:
742
+ query = self._base_list_datasets_query()
698
743
  query = query.where(self._datasets.c.name.startswith(prefix))
699
- yield from self._parse_datasets(self.db.execute(query))
744
+ yield from self._parse_dataset_list(self.db.execute(query))
700
745
 
701
746
  def get_dataset(self, name: str, conn=None) -> DatasetRecord:
702
747
  """Gets a single dataset by name"""
datachain/dataset.py CHANGED
@@ -2,6 +2,7 @@ import builtins
2
2
  import json
3
3
  from dataclasses import dataclass, fields
4
4
  from datetime import datetime
5
+ from functools import cached_property
5
6
  from typing import (
6
7
  Any,
7
8
  NewType,
@@ -11,11 +12,15 @@ from typing import (
11
12
  )
12
13
  from urllib.parse import urlparse
13
14
 
15
+ import orjson
16
+
14
17
  from datachain.error import DatasetVersionNotFoundError
15
18
  from datachain.sql.types import NAME_TYPES_MAPPING, SQLType
16
19
 
17
20
  T = TypeVar("T", bound="DatasetRecord")
21
+ LT = TypeVar("LT", bound="DatasetListRecord")
18
22
  V = TypeVar("V", bound="DatasetVersion")
23
+ LV = TypeVar("LV", bound="DatasetListVersion")
19
24
  DD = TypeVar("DD", bound="DatasetDependency")
20
25
 
21
26
  DATASET_PREFIX = "ds://"
@@ -176,7 +181,7 @@ class DatasetVersion:
176
181
  schema: dict[str, Union[SQLType, type[SQLType]]]
177
182
  num_objects: Optional[int]
178
183
  size: Optional[int]
179
- preview: Optional[list[dict]]
184
+ _preview_data: Optional[Union[str, list[dict]]]
180
185
  sources: str = ""
181
186
  query_script: str = ""
182
187
  job_id: Optional[str] = None
@@ -197,7 +202,7 @@ class DatasetVersion:
197
202
  script_output: str,
198
203
  num_objects: Optional[int],
199
204
  size: Optional[int],
200
- preview: Optional[str],
205
+ preview: Optional[Union[str, list[dict]]],
201
206
  schema: dict[str, Union[SQLType, type[SQLType]]],
202
207
  sources: str = "",
203
208
  query_script: str = "",
@@ -218,7 +223,7 @@ class DatasetVersion:
218
223
  schema,
219
224
  num_objects,
220
225
  size,
221
- json.loads(preview) if preview else None,
226
+ preview,
222
227
  sources,
223
228
  query_script,
224
229
  job_id,
@@ -258,12 +263,73 @@ class DatasetVersion:
258
263
  for c_name, c_type in self.schema.items()
259
264
  }
260
265
 
266
+ @cached_property
267
+ def preview(self) -> Optional[list[dict]]:
268
+ if isinstance(self._preview_data, str):
269
+ return orjson.loads(self._preview_data)
270
+ return self._preview_data if self._preview_data else None
271
+
261
272
  @classmethod
262
273
  def from_dict(cls, d: dict[str, Any]) -> "DatasetVersion":
263
274
  kwargs = {f.name: d[f.name] for f in fields(cls) if f.name in d}
275
+ if not hasattr(kwargs, "_preview_data"):
276
+ kwargs["_preview_data"] = d.get("preview")
264
277
  return cls(**kwargs)
265
278
 
266
279
 
280
+ @dataclass
281
+ class DatasetListVersion:
282
+ id: int
283
+ uuid: str
284
+ dataset_id: int
285
+ version: int
286
+ status: int
287
+ created_at: datetime
288
+ finished_at: Optional[datetime]
289
+ error_message: str
290
+ error_stack: str
291
+ num_objects: Optional[int]
292
+ size: Optional[int]
293
+ query_script: str = ""
294
+ job_id: Optional[str] = None
295
+
296
+ @classmethod
297
+ def parse(
298
+ cls: type[LV],
299
+ id: int,
300
+ uuid: str,
301
+ dataset_id: int,
302
+ version: int,
303
+ status: int,
304
+ created_at: datetime,
305
+ finished_at: Optional[datetime],
306
+ error_message: str,
307
+ error_stack: str,
308
+ num_objects: Optional[int],
309
+ size: Optional[int],
310
+ query_script: str = "",
311
+ job_id: Optional[str] = None,
312
+ ):
313
+ return cls(
314
+ id,
315
+ uuid,
316
+ dataset_id,
317
+ version,
318
+ status,
319
+ created_at,
320
+ finished_at,
321
+ error_message,
322
+ error_stack,
323
+ num_objects,
324
+ size,
325
+ query_script,
326
+ job_id,
327
+ )
328
+
329
+ def __hash__(self):
330
+ return hash(f"{self.dataset_id}_{self.version}")
331
+
332
+
267
333
  @dataclass
268
334
  class DatasetRecord:
269
335
  id: int
@@ -447,20 +513,6 @@ class DatasetRecord:
447
513
  identifier = self.identifier(version)
448
514
  return f"{DATASET_PREFIX}{identifier}"
449
515
 
450
- @property
451
- def is_bucket_listing(self) -> bool:
452
- """
453
- For bucket listing we implicitly create underlying dataset to hold data. This
454
- method is checking if this is one of those datasets.
455
- """
456
- from datachain.client import Client
457
-
458
- # TODO refactor and maybe remove method in
459
- # https://github.com/iterative/datachain/issues/318
460
- return Client.is_data_source_uri(self.name) or self.name.startswith(
461
- LISTING_PREFIX
462
- )
463
-
464
516
  @property
465
517
  def versions_values(self) -> list[int]:
466
518
  """
@@ -499,5 +551,92 @@ class DatasetRecord:
499
551
  return cls(**kwargs, versions=versions)
500
552
 
501
553
 
554
+ @dataclass
555
+ class DatasetListRecord:
556
+ id: int
557
+ name: str
558
+ description: Optional[str]
559
+ labels: list[str]
560
+ versions: list[DatasetListVersion]
561
+ created_at: Optional[datetime] = None
562
+
563
+ @classmethod
564
+ def parse( # noqa: PLR0913
565
+ cls: type[LT],
566
+ id: int,
567
+ name: str,
568
+ description: Optional[str],
569
+ labels: str,
570
+ created_at: datetime,
571
+ version_id: int,
572
+ version_uuid: str,
573
+ version_dataset_id: int,
574
+ version: int,
575
+ version_status: int,
576
+ version_created_at: datetime,
577
+ version_finished_at: Optional[datetime],
578
+ version_error_message: str,
579
+ version_error_stack: str,
580
+ version_num_objects: Optional[int],
581
+ version_size: Optional[int],
582
+ version_query_script: Optional[str],
583
+ version_job_id: Optional[str] = None,
584
+ ) -> "DatasetListRecord":
585
+ labels_lst: list[str] = json.loads(labels) if labels else []
586
+
587
+ dataset_version = DatasetListVersion.parse(
588
+ version_id,
589
+ version_uuid,
590
+ version_dataset_id,
591
+ version,
592
+ version_status,
593
+ version_created_at,
594
+ version_finished_at,
595
+ version_error_message,
596
+ version_error_stack,
597
+ version_num_objects,
598
+ version_size,
599
+ version_query_script, # type: ignore[arg-type]
600
+ version_job_id,
601
+ )
602
+
603
+ return cls(
604
+ id,
605
+ name,
606
+ description,
607
+ labels_lst,
608
+ [dataset_version],
609
+ created_at,
610
+ )
611
+
612
+ def merge_versions(self, other: "DatasetListRecord") -> "DatasetListRecord":
613
+ """Merge versions from another dataset"""
614
+ if other.id != self.id:
615
+ raise RuntimeError("Cannot merge versions of datasets with different ids")
616
+ if not other.versions:
617
+ # nothing to merge
618
+ return self
619
+ if not self.versions:
620
+ self.versions = []
621
+
622
+ self.versions = list(set(self.versions + other.versions))
623
+ self.versions.sort(key=lambda v: v.version)
624
+ return self
625
+
626
+ @property
627
+ def is_bucket_listing(self) -> bool:
628
+ """
629
+ For bucket listing we implicitly create underlying dataset to hold data. This
630
+ method is checking if this is one of those datasets.
631
+ """
632
+ from datachain.client import Client
633
+
634
+ # TODO refactor and maybe remove method in
635
+ # https://github.com/iterative/datachain/issues/318
636
+ return Client.is_data_source_uri(self.name) or self.name.startswith(
637
+ LISTING_PREFIX
638
+ )
639
+
640
+
502
641
  class RowDict(dict):
503
642
  pass
@@ -5,7 +5,11 @@ from uuid import uuid4
5
5
 
6
6
  from pydantic import Field, field_validator
7
7
 
8
- from datachain.dataset import DatasetRecord, DatasetStatus, DatasetVersion
8
+ from datachain.dataset import (
9
+ DatasetListRecord,
10
+ DatasetListVersion,
11
+ DatasetStatus,
12
+ )
9
13
  from datachain.job import Job
10
14
  from datachain.lib.data_model import DataModel
11
15
  from datachain.utils import TIME_ZERO
@@ -57,8 +61,8 @@ class DatasetInfo(DataModel):
57
61
  @classmethod
58
62
  def from_models(
59
63
  cls,
60
- dataset: DatasetRecord,
61
- version: DatasetVersion,
64
+ dataset: DatasetListRecord,
65
+ version: DatasetListVersion,
62
66
  job: Optional[Job],
63
67
  ) -> "Self":
64
68
  return cls(
@@ -178,17 +178,9 @@ class StudioClient:
178
178
  data = {}
179
179
 
180
180
  if not ok:
181
- logger.error(
182
- "Got bad response from Studio, content is %s",
183
- response.content.decode("utf-8"),
184
- )
185
181
  if response.status_code == 403:
186
182
  message = f"Not authorized for the team {self.team}"
187
183
  else:
188
- logger.error(
189
- "Got bad response from Studio, content is %s",
190
- response.content.decode("utf-8"),
191
- )
192
184
  message = data.get("message", "")
193
185
  else:
194
186
  message = ""
@@ -230,6 +222,46 @@ class StudioClient:
230
222
  def ls_datasets(self) -> Response[LsData]:
231
223
  return self._send_request("datachain/ls-datasets", {})
232
224
 
225
+ def edit_dataset(
226
+ self,
227
+ name: str,
228
+ new_name: Optional[str] = None,
229
+ description: Optional[str] = None,
230
+ labels: Optional[list[str]] = None,
231
+ ) -> Response[DatasetInfoData]:
232
+ body = {
233
+ "dataset_name": name,
234
+ }
235
+
236
+ if new_name is not None:
237
+ body["new_name"] = new_name
238
+
239
+ if description is not None:
240
+ body["description"] = description
241
+
242
+ if labels is not None:
243
+ body["labels"] = labels # type: ignore[assignment]
244
+
245
+ return self._send_request(
246
+ "datachain/edit-dataset",
247
+ body,
248
+ )
249
+
250
+ def rm_dataset(
251
+ self,
252
+ name: str,
253
+ version: Optional[int] = None,
254
+ force: Optional[bool] = False,
255
+ ) -> Response[DatasetInfoData]:
256
+ return self._send_request(
257
+ "datachain/rm-dataset",
258
+ {
259
+ "dataset_name": name,
260
+ "version": version,
261
+ "force": force,
262
+ },
263
+ )
264
+
233
265
  def dataset_info(self, name: str) -> Response[DatasetInfoData]:
234
266
  def _parse_dataset_info(dataset_info):
235
267
  _parse_dates(dataset_info, ["created_at", "finished_at"])
datachain/studio.py CHANGED
@@ -130,6 +130,35 @@ def list_datasets(team: Optional[str] = None):
130
130
  yield (name, version)
131
131
 
132
132
 
133
+ def edit_studio_dataset(
134
+ team_name: Optional[str],
135
+ name: str,
136
+ new_name: Optional[str] = None,
137
+ description: Optional[str] = None,
138
+ labels: Optional[list[str]] = None,
139
+ ):
140
+ client = StudioClient(team=team_name)
141
+ response = client.edit_dataset(name, new_name, description, labels)
142
+ if not response.ok:
143
+ raise_remote_error(response.message)
144
+
145
+ print(f"Dataset {name} updated")
146
+
147
+
148
+ def remove_studio_dataset(
149
+ team_name: Optional[str],
150
+ name: str,
151
+ version: Optional[int] = None,
152
+ force: Optional[bool] = False,
153
+ ):
154
+ client = StudioClient(team=team_name)
155
+ response = client.rm_dataset(name, version, force)
156
+ if not response.ok:
157
+ raise_remote_error(response.message)
158
+
159
+ print(f"Dataset {name} removed")
160
+
161
+
133
162
  def save_config(hostname, token):
134
163
  config = Config(ConfigLevel.GLOBAL)
135
164
  with config.edit() as conf:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: datachain
3
- Version: 0.7.2
3
+ Version: 0.7.4
4
4
  Summary: Wrangle unstructured AI data at scale
5
5
  Author-email: Dmitry Petrov <support@dvc.org>
6
6
  License: Apache-2.0
@@ -139,7 +139,7 @@ Key Features
139
139
  ============
140
140
 
141
141
  📂 **Multimodal Dataset Versioning.**
142
- - Version unstructured data without redundant data copies, by supporitng
142
+ - Version unstructured data without redundant data copies, by supporting
143
143
  references to S3, GCP, Azure, and local file systems.
144
144
  - Multimodal data support: images, video, text, PDFs, JSONs, CSVs, parquet, etc.
145
145
  - Unite files and metadata together into persistent, versioned, columnar datasets.
@@ -2,10 +2,10 @@ datachain/__init__.py,sha256=ofPJ6B-d-ybSDRrE7J6wqF_ZRAB2W9U8l-eeuBtqPLg,865
2
2
  datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
3
3
  datachain/asyn.py,sha256=5aKrjnUxk0mtnZeFKNJd1DCE0MsnSoyJBZkr0y9H_a0,9313
4
4
  datachain/cache.py,sha256=s0YHN7qurmQv-eC265TjeureK84TebWWAnL07cxchZQ,2997
5
- datachain/cli.py,sha256=weZDEj4Kkgi9vqzqJdQcX_jSymSINHbbZjjTqu1RHa4,36685
5
+ datachain/cli.py,sha256=1hiBClE1kbRyx0DK3uX5KMVa0ktbsG6TsFSNvoT2xxs,39399
6
6
  datachain/cli_utils.py,sha256=jrn9ejGXjybeO1ur3fjdSiAyCHZrX0qsLLbJzN9ErPM,2418
7
7
  datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
8
- datachain/dataset.py,sha256=0IN-5y723y-bnFlieKtOFZLCjwX_yplFo3q0DV7LRPw,14821
8
+ datachain/dataset.py,sha256=PKHaEXeYOL2gE5BaEmc9rzPJdDg5O9X8_7FvSh_Q9Vg,18614
9
9
  datachain/error.py,sha256=bxAAL32lSeMgzsQDEHbGTGORj-mPzzpCRvWDPueJNN4,1092
10
10
  datachain/job.py,sha256=Jt4sNutMHJReaGsj3r3scueN5aESLGfhimAa8pUP7Is,1271
11
11
  datachain/listing.py,sha256=TgKg25ZWAP5enzKgw2_2GUPJVdnQUh6uySHB5SJrUY4,7773
@@ -14,11 +14,11 @@ datachain/nodes_fetcher.py,sha256=ILMzUW5o4_6lUOVrLDC9gJPCXfcgKnMG68plrc7dAOA,11
14
14
  datachain/nodes_thread_pool.py,sha256=uPo-xl8zG5m9YgODjPFBpbcqqHjI-dcxH87yAbj_qco,3192
15
15
  datachain/progress.py,sha256=5KotcvvzAUL_RF0GEj4JY0IB1lyImnmHxe89YkT1XO4,4330
16
16
  datachain/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- datachain/studio.py,sha256=6kxF7VxPAbh9D7_Bk8_SghS5OXrwUwSpDaw19eNCTP4,4083
17
+ datachain/studio.py,sha256=w41vgVPrBfJ02XQOaDccLbh-1uSAfq9cAgOmkYUqExE,4845
18
18
  datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
19
19
  datachain/utils.py,sha256=-mSFowjIidJ4_sMXInvNHLn4rK_QnHuIlLuH1_lMGmI,13897
20
20
  datachain/catalog/__init__.py,sha256=g2iAAFx_gEIrqshXlhSEbrc8qDaEH11cjU40n3CHDz4,409
21
- datachain/catalog/catalog.py,sha256=sWljYCIpvUR3eCeYg4GTZXfyn5ropZVkfEPocc9m7KE,57941
21
+ datachain/catalog/catalog.py,sha256=l_HAxor5i_F03VvbmMuwhi4INhsmNrqubyydPhXWo2Y,57980
22
22
  datachain/catalog/datasource.py,sha256=D-VWIVDCM10A8sQavLhRXdYSCG7F4o4ifswEF80_NAQ,1412
23
23
  datachain/catalog/loader.py,sha256=-6VelNfXUdgUnwInVyA8g86Boxv2xqhTh9xNS-Zlwig,8242
24
24
  datachain/client/__init__.py,sha256=T4wiYL9KIM0ZZ_UqIyzV8_ufzYlewmizlV4iymHNluE,86
@@ -33,7 +33,7 @@ datachain/data_storage/__init__.py,sha256=cEOJpyu1JDZtfUupYucCDNFI6e5Wmp_Oyzq6rZ
33
33
  datachain/data_storage/db_engine.py,sha256=81Ol1of9TTTzD97ORajCnP366Xz2mEJt6C-kTUCaru4,3406
34
34
  datachain/data_storage/id_generator.py,sha256=lCEoU0BM37Ai2aRpSbwo5oQT0GqZnSpYwwvizathRMQ,4292
35
35
  datachain/data_storage/job.py,sha256=w-7spowjkOa1P5fUVtJou3OltT0L48P0RYWZ9rSJ9-s,383
36
- datachain/data_storage/metastore.py,sha256=5b7o_CSHC2djottebYn-Hq5q0yaSLOKPIRCnaVRvjsU,36056
36
+ datachain/data_storage/metastore.py,sha256=VPq-Dl8P-RbZQMzn6vB9aXBPKUWPTwP8ypkaVfE-7PU,37661
37
37
  datachain/data_storage/schema.py,sha256=-QVlRvD0dfu-ZFUxylEoSnLJLnleMEjVlcAb2OGu-AY,9895
38
38
  datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
39
39
  datachain/data_storage/sqlite.py,sha256=nF-2B-n8YZh9cJlZv4XnbahAJDW6pvrp1h9L-140M7A,27538
@@ -52,7 +52,7 @@ datachain/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  datachain/lib/arrow.py,sha256=b5efxAUaNNYVwtXVJqj07D3zf5KC-BPlLCxKEZbEG6w,9429
53
53
  datachain/lib/clip.py,sha256=lm5CzVi4Cj1jVLEKvERKArb-egb9j1Ls-fwTItT6vlI,6150
54
54
  datachain/lib/data_model.py,sha256=zS4lmXHVBXc9ntcyea2a1CRLXGSAN_0glXcF88CohgY,2685
55
- datachain/lib/dataset_info.py,sha256=3APfNYMWizIwXhgRYpMQKSeVntNAvQuBbbB25dV7mgY,2460
55
+ datachain/lib/dataset_info.py,sha256=IjdF1E0TQNOq9YyynfWiCFTeZpbyGfyJvxgJY4YN810,2493
56
56
  datachain/lib/dc.py,sha256=J7liATKQBJCkeHanVLr0s3d1t5wxiiiSJuSbuxKBbLg,89527
57
57
  datachain/lib/file.py,sha256=-XMkL6ED1sE7TMhWoMRTEuOXswZJw8X6AEmJDONFP74,15019
58
58
  datachain/lib/hf.py,sha256=a-zFpDmZIR4r8dlNNTjfpAKSnuJ9xyRXlgcdENiXt3E,5864
@@ -96,7 +96,7 @@ datachain/query/queue.py,sha256=waqM_KzavU8C-G95-4211Nd4GXna_u2747Chgwtgz2w,3839
96
96
  datachain/query/schema.py,sha256=b_KnVy6B26Ol4nYG0LqNNpeQ1QYPk95YRGUjXfdaQWs,6606
97
97
  datachain/query/session.py,sha256=50SOdLNCjqHHKI-L4xGXyzTVxzMWfANqKqjeYre-c2k,5959
98
98
  datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
99
- datachain/remote/studio.py,sha256=g88kHdlRhmruiWwoIxq_JJoymZUrtMAL937NWQyWyXI,9209
99
+ datachain/remote/studio.py,sha256=z9DTDqfdWKT8MC23wRDTOHvI8hc_OySS1Ce3F617gjA,9906
100
100
  datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
101
101
  datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
102
102
  datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
@@ -117,9 +117,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
117
117
  datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
118
118
  datachain/toolkit/split.py,sha256=ZgDcrNiKiPXZmKD591_1z9qRIXitu5zwAsoVPB7ykiU,2508
119
119
  datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
120
- datachain-0.7.2.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
121
- datachain-0.7.2.dist-info/METADATA,sha256=FuinZ-OIUuKz_b26-eirZl4hJdHJ4oOa8MO-LxzGywc,18006
122
- datachain-0.7.2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
123
- datachain-0.7.2.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
124
- datachain-0.7.2.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
125
- datachain-0.7.2.dist-info/RECORD,,
120
+ datachain-0.7.4.dist-info/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
121
+ datachain-0.7.4.dist-info/METADATA,sha256=tr5ReyIE9nUfhvCwuGujJC1MmfO07A10N1sLfvOBcYQ,18006
122
+ datachain-0.7.4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
123
+ datachain-0.7.4.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
124
+ datachain-0.7.4.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
125
+ datachain-0.7.4.dist-info/RECORD,,