datachain 0.19.2__py3-none-any.whl → 0.20.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

@@ -3,7 +3,7 @@ import os
3
3
  import sqlite3
4
4
  from collections.abc import Iterable, Sequence
5
5
  from contextlib import contextmanager
6
- from functools import wraps
6
+ from functools import cached_property, wraps
7
7
  from time import sleep
8
8
  from typing import (
9
9
  TYPE_CHECKING,
@@ -15,7 +15,15 @@ from typing import (
15
15
  )
16
16
 
17
17
  import sqlalchemy
18
- from sqlalchemy import MetaData, Table, UniqueConstraint, exists, select
18
+ from sqlalchemy import (
19
+ Column,
20
+ Integer,
21
+ MetaData,
22
+ Table,
23
+ UniqueConstraint,
24
+ exists,
25
+ select,
26
+ )
19
27
  from sqlalchemy.dialects import sqlite
20
28
  from sqlalchemy.schema import CreateIndex, CreateTable, DropTable
21
29
  from sqlalchemy.sql import func
@@ -30,7 +38,9 @@ from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse
30
38
  from datachain.data_storage.db_engine import DatabaseEngine
31
39
  from datachain.data_storage.schema import DefaultSchema
32
40
  from datachain.dataset import DatasetRecord, StorageURI
33
- from datachain.error import DataChainError
41
+ from datachain.error import DataChainError, OutdatedDatabaseSchemaError
42
+ from datachain.namespace import Namespace
43
+ from datachain.project import Project
34
44
  from datachain.sql.sqlite import create_user_defined_sql_functions, sqlite_dialect
35
45
  from datachain.sql.sqlite.base import load_usearch_extension
36
46
  from datachain.sql.types import SQLType
@@ -60,6 +70,14 @@ datachain.sql.sqlite.setup()
60
70
  quote_schema = sqlite_dialect.identifier_preparer.quote_schema
61
71
  quote = sqlite_dialect.identifier_preparer.quote
62
72
 
73
+ # NOTE! This should be manually increased when we change our DB schema in codebase
74
+ SCHEMA_VERSION = 1
75
+
76
+ OUTDATED_SCHEMA_ERROR_MESSAGE = (
77
+ "You have an old version of the database schema. Please refer to the documentation"
78
+ " for more information."
79
+ )
80
+
63
81
 
64
82
  def _get_in_memory_uri():
65
83
  return "file::memory:?cache=shared"
@@ -303,6 +321,11 @@ class SQLiteDatabaseEngine(DatabaseEngine):
303
321
  )
304
322
  return bool(next(self.execute(query))[0])
305
323
 
324
+ @property
325
+ def table_names(self) -> list[str]:
326
+ query = "SELECT name FROM sqlite_master WHERE type='table';"
327
+ return [r[0] for r in self.execute_str(query).fetchall()]
328
+
306
329
  def create_table(self, table: "Table", if_not_exists: bool = True) -> None:
307
330
  self.execute(CreateTable(table, if_not_exists=if_not_exists))
308
331
 
@@ -321,6 +344,8 @@ class SQLiteMetastore(AbstractDBMetastore):
321
344
  This is currently used for the local cli.
322
345
  """
323
346
 
347
+ META_TABLE = "meta"
348
+
324
349
  db: "SQLiteDatabaseEngine"
325
350
 
326
351
  def __init__(
@@ -342,7 +367,11 @@ class SQLiteMetastore(AbstractDBMetastore):
342
367
 
343
368
  self.db = db or SQLiteDatabaseEngine.from_db_file(db_file)
344
369
 
370
+ self._init_meta_table()
371
+ self._init_meta_schema_value()
372
+ self._check_schema_version()
345
373
  self._init_tables()
374
+ self._init_namespaces_projects()
346
375
 
347
376
  def __exit__(self, exc_type, exc_value, traceback) -> None:
348
377
  """Close connection upon exit from context manager."""
@@ -383,8 +412,44 @@ class SQLiteMetastore(AbstractDBMetastore):
383
412
  (db_class, db_args, db_kwargs) = db_clone_params
384
413
  return cls(uri=uri, db=db_class(*db_args, **db_kwargs))
385
414
 
415
+ @cached_property
416
+ def _meta(self) -> Table:
417
+ return Table(self.META_TABLE, self.db.metadata, *self._meta_columns())
418
+
419
+ def _meta_select(self, *columns) -> "Select":
420
+ if not columns:
421
+ return self._meta.select()
422
+ return select(*columns)
423
+
424
+ def _meta_insert(self) -> "Insert":
425
+ return sqlite.insert(self._meta)
426
+
427
+ def _init_meta_table(self) -> None:
428
+ """Initializes meta table"""
429
+ # NOTE! needs to be called before _init_tables()
430
+ table_names = self.db.table_names
431
+ if table_names and self.META_TABLE not in table_names:
432
+ # this will happen on first run
433
+ raise OutdatedDatabaseSchemaError(OUTDATED_SCHEMA_ERROR_MESSAGE)
434
+
435
+ self.db.create_table(self._meta, if_not_exists=True)
436
+ self.default_table_names.append(self._meta.name)
437
+
438
+ def _init_meta_schema_value(self) -> None:
439
+ """Inserts current schema version value if not present in meta table yet"""
440
+ stmt = (
441
+ self._meta_insert()
442
+ .values(id=1, schema_version=SCHEMA_VERSION)
443
+ .on_conflict_do_nothing(index_elements=["id"])
444
+ )
445
+ self.db.execute(stmt)
446
+
386
447
  def _init_tables(self) -> None:
387
448
  """Initialize tables."""
449
+ self.db.create_table(self._namespaces, if_not_exists=True)
450
+ self.default_table_names.append(self._namespaces.name)
451
+ self.db.create_table(self._projects, if_not_exists=True)
452
+ self.default_table_names.append(self._projects.name)
388
453
  self.db.create_table(self._datasets, if_not_exists=True)
389
454
  self.default_table_names.append(self._datasets.name)
390
455
  self.db.create_table(self._datasets_versions, if_not_exists=True)
@@ -394,10 +459,55 @@ class SQLiteMetastore(AbstractDBMetastore):
394
459
  self.db.create_table(self._jobs, if_not_exists=True)
395
460
  self.default_table_names.append(self._jobs.name)
396
461
 
462
+ def _init_namespaces_projects(self) -> None:
463
+ """
464
+ Creates local namespace and local project connected to it.
465
+ In local environment user cannot explicitly create other namespaces and
466
+ projects and all datasets user creates will be stored in those.
467
+ When pulling dataset from Studio, then other namespaces and projects will
468
+ be created implicitly though, to keep the same fully qualified name with
469
+ Studio dataset.
470
+ """
471
+ system_namespace = self.create_namespace(Namespace.system(), "System namespace")
472
+ self.create_project(Project.listing(), system_namespace.name, "Listing project")
473
+
474
+ local_namespace = self.create_namespace(Namespace.default(), "Local namespace")
475
+ self.create_project(Project.default(), local_namespace.name, "Local project")
476
+
477
+ def _check_schema_version(self) -> None:
478
+ """
479
+ Checks if current DB schema is up to date with latest DB model and schema
480
+ version. If not, OutdatedDatabaseSchemaError is raised.
481
+ """
482
+ schema_version = next(self.db.execute(self._meta_select()))[1]
483
+ if schema_version < SCHEMA_VERSION:
484
+ raise OutdatedDatabaseSchemaError(OUTDATED_SCHEMA_ERROR_MESSAGE)
485
+
486
+ #
487
+ # Dataset dependencies
488
+ #
489
+ @classmethod
490
+ def _meta_columns(cls) -> list["SchemaItem"]:
491
+ return [
492
+ Column("id", Integer, primary_key=True),
493
+ Column("schema_version", Integer, default=SCHEMA_VERSION),
494
+ ]
495
+
397
496
  @classmethod
398
497
  def _datasets_columns(cls) -> list["SchemaItem"]:
399
498
  """Datasets table columns."""
400
- return [*super()._datasets_columns(), UniqueConstraint("name")]
499
+ return [*super()._datasets_columns(), UniqueConstraint("project_id", "name")]
500
+
501
+ @classmethod
502
+ def _namespaces_columns(cls) -> list["SchemaItem"]:
503
+ """Datasets table columns."""
504
+ return [*super()._namespaces_columns(), UniqueConstraint("name")]
505
+
506
+ def _namespaces_insert(self) -> "Insert":
507
+ return sqlite.insert(self._namespaces)
508
+
509
+ def _projects_insert(self) -> "Insert":
510
+ return sqlite.insert(self._projects)
401
511
 
402
512
  def _datasets_insert(self) -> "Insert":
403
513
  return sqlite.insert(self._datasets)
@@ -414,6 +524,8 @@ class SQLiteMetastore(AbstractDBMetastore):
414
524
 
415
525
  def _dataset_dependencies_select_columns(self) -> list["SchemaItem"]:
416
526
  return [
527
+ self._namespaces.c.name,
528
+ self._projects.c.name,
417
529
  self._datasets_dependencies.c.id,
418
530
  self._datasets_dependencies.c.dataset_id,
419
531
  self._datasets_dependencies.c.dataset_version_id,
@@ -429,6 +541,26 @@ class SQLiteMetastore(AbstractDBMetastore):
429
541
  def _jobs_insert(self) -> "Insert":
430
542
  return sqlite.insert(self._jobs)
431
543
 
544
+ @property
545
+ def is_studio(self) -> bool:
546
+ return False
547
+
548
+ #
549
+ # Namespaces
550
+ #
551
+
552
+ @property
553
+ def default_namespace_name(self):
554
+ return Namespace.default()
555
+
556
+ #
557
+ # Projects
558
+ #
559
+
560
+ @property
561
+ def default_project_name(self):
562
+ return Project.default()
563
+
432
564
 
433
565
  class SQLiteWarehouse(AbstractWarehouse):
434
566
  """
@@ -534,16 +666,16 @@ class SQLiteWarehouse(AbstractWarehouse):
534
666
  ) -> None:
535
667
  dst_empty = False
536
668
 
537
- if not self.db.has_table(self.dataset_table_name(src.name, src_version)):
669
+ if not self.db.has_table(self.dataset_table_name(src, src_version)):
538
670
  # source table doesn't exist, nothing to do
539
671
  return
540
672
 
541
673
  src_dr = self.dataset_rows(src, src_version).table
542
674
 
543
- if not self.db.has_table(self.dataset_table_name(dst.name, dst_version)):
675
+ if not self.db.has_table(self.dataset_table_name(dst, dst_version)):
544
676
  # destination table doesn't exist, create it
545
677
  self.create_dataset_rows_table(
546
- self.dataset_table_name(dst.name, dst_version),
678
+ self.dataset_table_name(dst, dst_version),
547
679
  columns=src_dr.columns,
548
680
  )
549
681
  dst_empty = True
@@ -182,7 +182,7 @@ class AbstractWarehouse(ABC, Serializable):
182
182
  ):
183
183
  version = version or dataset.latest_version
184
184
 
185
- table_name = self.dataset_table_name(dataset.name, version)
185
+ table_name = self.dataset_table_name(dataset, version)
186
186
  return self.schema.dataset_row_cls(
187
187
  table_name,
188
188
  self.db,
@@ -254,12 +254,24 @@ class AbstractWarehouse(ABC, Serializable):
254
254
  name = parsed.path if parsed.scheme == "file" else parsed.netloc
255
255
  return parsed.scheme, name
256
256
 
257
- def dataset_table_name(self, dataset_name: str, version: str) -> str:
257
+ def dataset_table_name(self, dataset: DatasetRecord, version: str) -> str:
258
+ return self._construct_dataset_table_name(
259
+ dataset.project.namespace.name,
260
+ dataset.project.name,
261
+ dataset.name,
262
+ version,
263
+ )
264
+
265
+ def _construct_dataset_table_name(
266
+ self, namespace: str, project: str, dataset_name: str, version: str
267
+ ) -> str:
258
268
  prefix = self.DATASET_TABLE_PREFIX
259
269
  if Client.is_data_source_uri(dataset_name):
260
270
  # for datasets that are created for bucket listing we use different prefix
261
271
  prefix = self.DATASET_SOURCE_TABLE_PREFIX
262
- return f"{prefix}{dataset_name}_{version.replace('.', '_')}"
272
+ return (
273
+ f"{prefix}{namespace}_{project}_{dataset_name}_{version.replace('.', '_')}"
274
+ )
263
275
 
264
276
  def temp_table_name(self) -> str:
265
277
  return self.TMP_TABLE_NAME_PREFIX + _random_string(6)
@@ -287,7 +299,7 @@ class AbstractWarehouse(ABC, Serializable):
287
299
  if_exists: bool = True,
288
300
  ) -> None:
289
301
  """Drops a dataset rows table for the given dataset name."""
290
- table_name = self.dataset_table_name(dataset.name, version)
302
+ table_name = self.dataset_table_name(dataset, version)
291
303
  table = sa.Table(table_name, self.db.metadata)
292
304
  self.db.drop_table(table, if_exists=if_exists)
293
305
 
@@ -344,13 +356,20 @@ class AbstractWarehouse(ABC, Serializable):
344
356
 
345
357
  def rename_dataset_table(
346
358
  self,
359
+ dataset: DatasetRecord,
347
360
  old_name: str,
348
361
  new_name: str,
349
362
  old_version: str,
350
363
  new_version: str,
351
364
  ) -> None:
352
- old_ds_table_name = self.dataset_table_name(old_name, old_version)
353
- new_ds_table_name = self.dataset_table_name(new_name, new_version)
365
+ namespace = dataset.project.namespace.name
366
+ project = dataset.project.name
367
+ old_ds_table_name = self._construct_dataset_table_name(
368
+ namespace, project, old_name, old_version
369
+ )
370
+ new_ds_table_name = self._construct_dataset_table_name(
371
+ namespace, project, new_name, new_version
372
+ )
354
373
 
355
374
  self.db.rename_table(old_ds_table_name, new_ds_table_name)
356
375
 
@@ -368,7 +387,7 @@ class AbstractWarehouse(ABC, Serializable):
368
387
  """
369
388
  Returns tuple with dataset stats: total number of rows and total dataset size.
370
389
  """
371
- if not (self.db.has_table(self.dataset_table_name(dataset.name, version))):
390
+ if not (self.db.has_table(self.dataset_table_name(dataset, version))):
372
391
  return None, None
373
392
 
374
393
  file_signals = list(
datachain/dataset.py CHANGED
@@ -13,7 +13,9 @@ from typing import (
13
13
  from urllib.parse import urlparse
14
14
 
15
15
  from datachain import semver
16
- from datachain.error import DatasetVersionNotFoundError
16
+ from datachain.error import DatasetVersionNotFoundError, InvalidDatasetNameError
17
+ from datachain.namespace import Namespace
18
+ from datachain.project import Project
17
19
  from datachain.sql.types import NAME_TYPES_MAPPING, SQLType
18
20
 
19
21
  T = TypeVar("T", bound="DatasetRecord")
@@ -27,6 +29,8 @@ QUERY_DATASET_PREFIX = "ds_query_"
27
29
  LISTING_PREFIX = "lst__"
28
30
 
29
31
  DEFAULT_DATASET_VERSION = "1.0.0"
32
+ DATASET_NAME_RESERVED_CHARS = ["."]
33
+ DATASET_NAME_REPLACEMENT_CHAR = "_"
30
34
 
31
35
 
32
36
  # StorageURI represents a normalised URI to a valid storage location (full bucket or
@@ -57,20 +61,34 @@ def parse_dataset_uri(uri: str) -> tuple[str, Optional[str]]:
57
61
  return name, s[1]
58
62
 
59
63
 
60
- def create_dataset_uri(name: str, version: Optional[str] = None) -> str:
64
+ def create_dataset_uri(
65
+ name: str, namespace: str, project: str, version: Optional[str] = None
66
+ ) -> str:
61
67
  """
62
- Creates a dataset uri based on dataset name and optionally version
68
+ Creates a dataset uri based on namespace, project, dataset name and optionally
69
+ version.
63
70
  Example:
64
- Input: zalando, 3.0.1
65
- Output: ds//zalando@v3.0.1
71
+ Input: dev, clothes, zalando, 3.0.1
72
+ Output: ds//dev.clothes.zalando@v3.0.1
66
73
  """
67
- uri = f"{DATASET_PREFIX}{name}"
74
+ uri = f"{DATASET_PREFIX}{namespace}.{project}.{name}"
68
75
  if version:
69
76
  uri += f"@v{version}"
70
77
 
71
78
  return uri
72
79
 
73
80
 
81
+ def parse_dataset_name(name: str) -> tuple[Optional[str], Optional[str], str]:
82
+ """Parses dataset name and returns namespace, project and name"""
83
+ if not name:
84
+ raise ValueError("Name must be defined to parse it")
85
+ split = name.split(".")
86
+ if len(split) == 3:
87
+ return tuple(split) # type: ignore[return-value]
88
+
89
+ return None, None, name
90
+
91
+
74
92
  class DatasetDependencyType:
75
93
  DATASET = "dataset"
76
94
  STORAGE = "storage"
@@ -78,8 +96,12 @@ class DatasetDependencyType:
78
96
 
79
97
  @dataclass
80
98
  class DatasetDependency:
99
+ # TODO put `DatasetRecord` instead of name + version which will
100
+ # simplify codebase in various places
81
101
  id: int
82
102
  type: str
103
+ namespace: str
104
+ project: str
83
105
  name: str
84
106
  version: str
85
107
  created_at: datetime
@@ -100,6 +122,8 @@ class DatasetDependency:
100
122
  @classmethod
101
123
  def parse(
102
124
  cls: builtins.type[DD],
125
+ namespace_name: str,
126
+ project_name: str,
103
127
  id: int,
104
128
  dataset_id: Optional[int],
105
129
  dataset_version_id: Optional[int],
@@ -121,6 +145,8 @@ class DatasetDependency:
121
145
  if is_listing_dataset(dataset_name)
122
146
  else DatasetDependencyType.DATASET
123
147
  ),
148
+ namespace_name,
149
+ project_name,
124
150
  dataset_name,
125
151
  (
126
152
  dataset_version # type: ignore[arg-type]
@@ -335,6 +361,7 @@ class DatasetListVersion:
335
361
  class DatasetRecord:
336
362
  id: int
337
363
  name: str
364
+ project: Project
338
365
  description: Optional[str]
339
366
  attrs: list[str]
340
367
  schema: dict[str, Union[SQLType, type[SQLType]]]
@@ -349,6 +376,9 @@ class DatasetRecord:
349
376
  sources: str = ""
350
377
  query_script: str = ""
351
378
 
379
+ def __hash__(self):
380
+ return hash(f"{self.id}")
381
+
352
382
  @staticmethod
353
383
  def parse_schema(
354
384
  ct: dict[str, Any],
@@ -358,10 +388,31 @@ class DatasetRecord:
358
388
  for c_name, c_type in ct.items()
359
389
  }
360
390
 
391
+ @staticmethod
392
+ def validate_name(name: str) -> None:
393
+ """Throws exception if name has reserved characters"""
394
+ for c in DATASET_NAME_RESERVED_CHARS:
395
+ if c in name:
396
+ raise InvalidDatasetNameError(
397
+ f"Character {c} is reserved and not allowed in dataset name"
398
+ )
399
+
361
400
  @classmethod
362
401
  def parse( # noqa: PLR0913
363
402
  cls,
364
- id: int,
403
+ namespace_id: int,
404
+ namespace_uuid: str,
405
+ namespace_name: str,
406
+ namespace_description: Optional[str],
407
+ namespace_created_at: datetime,
408
+ project_id: int,
409
+ project_uuid: str,
410
+ project_name: str,
411
+ project_description: Optional[str],
412
+ project_created_at: datetime,
413
+ project_namespace_id: int,
414
+ dataset_id: int,
415
+ dataset_project_id: int,
365
416
  name: str,
366
417
  description: Optional[str],
367
418
  attrs: str,
@@ -400,6 +451,23 @@ class DatasetRecord:
400
451
  json.loads(version_schema) if version_schema else {}
401
452
  )
402
453
 
454
+ namespace = Namespace(
455
+ namespace_id,
456
+ namespace_uuid,
457
+ namespace_name,
458
+ namespace_description,
459
+ namespace_created_at,
460
+ )
461
+
462
+ project = Project(
463
+ project_id,
464
+ project_uuid,
465
+ project_name,
466
+ project_description,
467
+ project_created_at,
468
+ namespace,
469
+ )
470
+
403
471
  dataset_version = DatasetVersion.parse(
404
472
  version_id,
405
473
  version_uuid,
@@ -422,8 +490,9 @@ class DatasetRecord:
422
490
  )
423
491
 
424
492
  return cls(
425
- id,
493
+ dataset_id,
426
494
  name,
495
+ project,
427
496
  description,
428
497
  attrs_lst,
429
498
  cls.parse_schema(schema_dct), # type: ignore[arg-type]
@@ -448,6 +517,10 @@ class DatasetRecord:
448
517
  for c_name, c_type in self.schema.items()
449
518
  }
450
519
 
520
+ @property
521
+ def full_name(self) -> str:
522
+ return f"{self.project.namespace.name}.{self.project.name}.{self.name}"
523
+
451
524
  def get_schema(self, version: str) -> dict[str, Union[SQLType, type[SQLType]]]:
452
525
  return self.get_version(version).schema if version else self.schema
453
526
 
@@ -527,7 +600,10 @@ class DatasetRecord:
527
600
  Dataset uri example: ds://dogs@v3.0.1
528
601
  """
529
602
  identifier = self.identifier(version)
530
- return f"{DATASET_PREFIX}{identifier}"
603
+ return (
604
+ f"{DATASET_PREFIX}{self.project.namespace.name}"
605
+ f".{self.project.name}.{identifier}"
606
+ )
531
607
 
532
608
  @property
533
609
  def next_version_major(self) -> str:
@@ -592,15 +668,17 @@ class DatasetRecord:
592
668
 
593
669
  @classmethod
594
670
  def from_dict(cls, d: dict[str, Any]) -> "DatasetRecord":
671
+ project = Project.from_dict(d.pop("project"))
595
672
  versions = [DatasetVersion.from_dict(v) for v in d.pop("versions", [])]
596
673
  kwargs = {f.name: d[f.name] for f in fields(cls) if f.name in d}
597
- return cls(**kwargs, versions=versions)
674
+ return cls(**kwargs, versions=versions, project=project)
598
675
 
599
676
 
600
677
  @dataclass
601
678
  class DatasetListRecord:
602
679
  id: int
603
680
  name: str
681
+ project: Project
604
682
  description: Optional[str]
605
683
  attrs: list[str]
606
684
  versions: list[DatasetListVersion]
@@ -609,7 +687,18 @@ class DatasetListRecord:
609
687
  @classmethod
610
688
  def parse( # noqa: PLR0913
611
689
  cls,
612
- id: int,
690
+ namespace_id: int,
691
+ namespace_uuid: str,
692
+ namespace_name: str,
693
+ namespace_description: Optional[str],
694
+ namespace_created_at: datetime,
695
+ project_id: int,
696
+ project_uuid: str,
697
+ project_name: str,
698
+ project_description: Optional[str],
699
+ project_created_at: datetime,
700
+ project_namespace_id: int,
701
+ dataset_id: int,
613
702
  name: str,
614
703
  description: Optional[str],
615
704
  attrs: str,
@@ -630,6 +719,23 @@ class DatasetListRecord:
630
719
  ) -> "DatasetListRecord":
631
720
  attrs_lst: list[str] = json.loads(attrs) if attrs else []
632
721
 
722
+ namespace = Namespace(
723
+ namespace_id,
724
+ namespace_uuid,
725
+ namespace_name,
726
+ namespace_description,
727
+ namespace_created_at,
728
+ )
729
+
730
+ project = Project(
731
+ project_id,
732
+ project_uuid,
733
+ project_name,
734
+ project_description,
735
+ project_created_at,
736
+ namespace,
737
+ )
738
+
633
739
  dataset_version = DatasetListVersion.parse(
634
740
  version_id,
635
741
  version_uuid,
@@ -647,14 +753,19 @@ class DatasetListRecord:
647
753
  )
648
754
 
649
755
  return cls(
650
- id,
756
+ dataset_id,
651
757
  name,
758
+ project,
652
759
  description,
653
760
  attrs_lst,
654
761
  [dataset_version],
655
762
  created_at,
656
763
  )
657
764
 
765
+ @property
766
+ def full_name(self) -> str:
767
+ return f"{self.project.namespace.name}.{self.project.name}.{self.name}"
768
+
658
769
  def merge_versions(self, other: "DatasetListRecord") -> "DatasetListRecord":
659
770
  """Merge versions from another dataset"""
660
771
  if other.id != self.id:
@@ -691,9 +802,11 @@ class DatasetListRecord:
691
802
 
692
803
  @classmethod
693
804
  def from_dict(cls, d: dict[str, Any]) -> "DatasetListRecord":
805
+ project = Project.from_dict(d.pop("project"))
694
806
  versions = [DatasetListVersion.parse(**v) for v in d.get("versions", [])]
695
807
  kwargs = {f.name: d[f.name] for f in fields(cls) if f.name in d}
696
808
  kwargs["versions"] = versions
809
+ kwargs["project"] = project
697
810
  return cls(**kwargs)
698
811
 
699
812
 
datachain/delta.py CHANGED
@@ -56,8 +56,10 @@ def _get_delta_chain(
56
56
  compare: Optional[Union[str, Sequence[str]]] = None,
57
57
  ) -> "DataChain":
58
58
  """Get delta chain for processing changes between versions."""
59
- source_dc = datachain.read_dataset(source_ds_name, source_ds_version)
60
- source_dc_latest = datachain.read_dataset(source_ds_name, source_ds_latest_version)
59
+ source_dc = datachain.read_dataset(source_ds_name, version=source_ds_version)
60
+ source_dc_latest = datachain.read_dataset(
61
+ source_ds_name, version=source_ds_latest_version
62
+ )
61
63
 
62
64
  # Calculate diff between source versions
63
65
  return source_dc_latest.compare(source_dc, on=on, compare=compare, deleted=False)
@@ -79,8 +81,10 @@ def _get_retry_chain(
79
81
  retry_chain = None
80
82
 
81
83
  # Read the latest version of the result dataset for retry logic
82
- result_dataset = datachain.read_dataset(name, latest_version)
83
- source_dc_latest = datachain.read_dataset(source_ds_name, source_ds_latest_version)
84
+ result_dataset = datachain.read_dataset(name, version=latest_version)
85
+ source_dc_latest = datachain.read_dataset(
86
+ source_ds_name, version=source_ds_latest_version
87
+ )
84
88
 
85
89
  # Handle error records if delta_retry is a string (column name)
86
90
  if isinstance(delta_retry, str):
@@ -232,7 +236,7 @@ def delta_retry_update(
232
236
  if processing_chain is None or (processing_chain and processing_chain.empty):
233
237
  return None, None, False
234
238
 
235
- latest_dataset = datachain.read_dataset(name, latest_version)
239
+ latest_dataset = datachain.read_dataset(name, version=latest_version)
236
240
  compared_chain = latest_dataset.compare(
237
241
  processing_chain,
238
242
  on=right_on or on,
datachain/error.py CHANGED
@@ -2,10 +2,42 @@ class DataChainError(RuntimeError):
2
2
  pass
3
3
 
4
4
 
5
+ class InvalidDatasetNameError(RuntimeError):
6
+ pass
7
+
8
+
9
+ class InvalidNamespaceNameError(RuntimeError):
10
+ pass
11
+
12
+
13
+ class InvalidProjectNameError(RuntimeError):
14
+ pass
15
+
16
+
5
17
  class NotFoundError(Exception):
6
18
  pass
7
19
 
8
20
 
21
+ class NamespaceNotFoundError(NotFoundError):
22
+ pass
23
+
24
+
25
+ class NotAllowedError(Exception):
26
+ pass
27
+
28
+
29
+ class NamespaceCreateNotAllowedError(NotAllowedError):
30
+ pass
31
+
32
+
33
+ class ProjectCreateNotAllowedError(NotAllowedError):
34
+ pass
35
+
36
+
37
+ class ProjectNotFoundError(NotFoundError):
38
+ pass
39
+
40
+
9
41
  class DatasetNotFoundError(NotFoundError):
10
42
  pass
11
43
 
@@ -53,3 +85,7 @@ class ClientError(RuntimeError):
53
85
 
54
86
  class TableMissingError(DataChainError):
55
87
  pass
88
+
89
+
90
+ class OutdatedDatabaseSchemaError(DataChainError):
91
+ pass