datachain 0.20.3__py3-none-any.whl → 0.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (47) hide show
  1. datachain/__init__.py +0 -2
  2. datachain/cache.py +2 -2
  3. datachain/catalog/catalog.py +65 -180
  4. datachain/cli/__init__.py +7 -0
  5. datachain/cli/commands/datasets.py +28 -43
  6. datachain/cli/commands/ls.py +2 -2
  7. datachain/cli/parser/__init__.py +35 -1
  8. datachain/client/fsspec.py +3 -5
  9. datachain/client/hf.py +0 -10
  10. datachain/client/local.py +4 -4
  11. datachain/data_storage/metastore.py +37 -403
  12. datachain/data_storage/sqlite.py +7 -139
  13. datachain/data_storage/warehouse.py +7 -26
  14. datachain/dataset.py +12 -126
  15. datachain/delta.py +7 -11
  16. datachain/error.py +0 -36
  17. datachain/func/func.py +1 -1
  18. datachain/lib/arrow.py +3 -3
  19. datachain/lib/dataset_info.py +0 -4
  20. datachain/lib/dc/datachain.py +92 -259
  21. datachain/lib/dc/datasets.py +49 -87
  22. datachain/lib/dc/listings.py +3 -3
  23. datachain/lib/dc/records.py +0 -1
  24. datachain/lib/dc/storage.py +40 -38
  25. datachain/lib/file.py +23 -77
  26. datachain/lib/listing.py +1 -3
  27. datachain/lib/meta_formats.py +1 -1
  28. datachain/lib/pytorch.py +1 -1
  29. datachain/lib/settings.py +0 -10
  30. datachain/lib/tar.py +2 -1
  31. datachain/lib/udf_signature.py +1 -1
  32. datachain/lib/webdataset.py +20 -30
  33. datachain/listing.py +1 -3
  34. datachain/query/dataset.py +46 -71
  35. datachain/query/session.py +1 -1
  36. datachain/remote/studio.py +26 -61
  37. datachain/studio.py +7 -23
  38. {datachain-0.20.3.dist-info → datachain-0.21.0.dist-info}/METADATA +2 -2
  39. {datachain-0.20.3.dist-info → datachain-0.21.0.dist-info}/RECORD +43 -47
  40. datachain/lib/namespaces.py +0 -71
  41. datachain/lib/projects.py +0 -86
  42. datachain/namespace.py +0 -65
  43. datachain/project.py +0 -78
  44. {datachain-0.20.3.dist-info → datachain-0.21.0.dist-info}/WHEEL +0 -0
  45. {datachain-0.20.3.dist-info → datachain-0.21.0.dist-info}/entry_points.txt +0 -0
  46. {datachain-0.20.3.dist-info → datachain-0.21.0.dist-info}/licenses/LICENSE +0 -0
  47. {datachain-0.20.3.dist-info → datachain-0.21.0.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ import os
3
3
  import sqlite3
4
4
  from collections.abc import Iterable, Sequence
5
5
  from contextlib import contextmanager
6
- from functools import cached_property, wraps
6
+ from functools import wraps
7
7
  from time import sleep
8
8
  from typing import (
9
9
  TYPE_CHECKING,
@@ -15,15 +15,7 @@ from typing import (
15
15
  )
16
16
 
17
17
  import sqlalchemy
18
- from sqlalchemy import (
19
- Column,
20
- Integer,
21
- MetaData,
22
- Table,
23
- UniqueConstraint,
24
- exists,
25
- select,
26
- )
18
+ from sqlalchemy import MetaData, Table, UniqueConstraint, exists, select
27
19
  from sqlalchemy.dialects import sqlite
28
20
  from sqlalchemy.schema import CreateIndex, CreateTable, DropTable
29
21
  from sqlalchemy.sql import func
@@ -38,9 +30,7 @@ from datachain.data_storage import AbstractDBMetastore, AbstractWarehouse
38
30
  from datachain.data_storage.db_engine import DatabaseEngine
39
31
  from datachain.data_storage.schema import DefaultSchema
40
32
  from datachain.dataset import DatasetRecord, StorageURI
41
- from datachain.error import DataChainError, OutdatedDatabaseSchemaError
42
- from datachain.namespace import Namespace
43
- from datachain.project import Project
33
+ from datachain.error import DataChainError
44
34
  from datachain.sql.sqlite import create_user_defined_sql_functions, sqlite_dialect
45
35
  from datachain.sql.sqlite.base import load_usearch_extension
46
36
  from datachain.sql.types import SQLType
@@ -70,14 +60,6 @@ datachain.sql.sqlite.setup()
70
60
  quote_schema = sqlite_dialect.identifier_preparer.quote_schema
71
61
  quote = sqlite_dialect.identifier_preparer.quote
72
62
 
73
- # NOTE! This should be manually increased when we change our DB schema in codebase
74
- SCHEMA_VERSION = 1
75
-
76
- OUTDATED_SCHEMA_ERROR_MESSAGE = (
77
- "You have an old version of the database schema. Please refer to the documentation"
78
- " for more information."
79
- )
80
-
81
63
 
82
64
  def _get_in_memory_uri():
83
65
  return "file::memory:?cache=shared"
@@ -321,11 +303,6 @@ class SQLiteDatabaseEngine(DatabaseEngine):
321
303
  )
322
304
  return bool(next(self.execute(query))[0])
323
305
 
324
- @property
325
- def table_names(self) -> list[str]:
326
- query = "SELECT name FROM sqlite_master WHERE type='table';"
327
- return [r[0] for r in self.execute_str(query).fetchall()]
328
-
329
306
  def create_table(self, table: "Table", if_not_exists: bool = True) -> None:
330
307
  self.execute(CreateTable(table, if_not_exists=if_not_exists))
331
308
 
@@ -344,8 +321,6 @@ class SQLiteMetastore(AbstractDBMetastore):
344
321
  This is currently used for the local cli.
345
322
  """
346
323
 
347
- META_TABLE = "meta"
348
-
349
324
  db: "SQLiteDatabaseEngine"
350
325
 
351
326
  def __init__(
@@ -367,11 +342,7 @@ class SQLiteMetastore(AbstractDBMetastore):
367
342
 
368
343
  self.db = db or SQLiteDatabaseEngine.from_db_file(db_file)
369
344
 
370
- self._init_meta_table()
371
- self._init_meta_schema_value()
372
- self._check_schema_version()
373
345
  self._init_tables()
374
- self._init_namespaces_projects()
375
346
 
376
347
  def __exit__(self, exc_type, exc_value, traceback) -> None:
377
348
  """Close connection upon exit from context manager."""
@@ -412,44 +383,8 @@ class SQLiteMetastore(AbstractDBMetastore):
412
383
  (db_class, db_args, db_kwargs) = db_clone_params
413
384
  return cls(uri=uri, db=db_class(*db_args, **db_kwargs))
414
385
 
415
- @cached_property
416
- def _meta(self) -> Table:
417
- return Table(self.META_TABLE, self.db.metadata, *self._meta_columns())
418
-
419
- def _meta_select(self, *columns) -> "Select":
420
- if not columns:
421
- return self._meta.select()
422
- return select(*columns)
423
-
424
- def _meta_insert(self) -> "Insert":
425
- return sqlite.insert(self._meta)
426
-
427
- def _init_meta_table(self) -> None:
428
- """Initializes meta table"""
429
- # NOTE! needs to be called before _init_tables()
430
- table_names = self.db.table_names
431
- if table_names and self.META_TABLE not in table_names:
432
- # this will happen on first run
433
- raise OutdatedDatabaseSchemaError(OUTDATED_SCHEMA_ERROR_MESSAGE)
434
-
435
- self.db.create_table(self._meta, if_not_exists=True)
436
- self.default_table_names.append(self._meta.name)
437
-
438
- def _init_meta_schema_value(self) -> None:
439
- """Inserts current schema version value if not present in meta table yet"""
440
- stmt = (
441
- self._meta_insert()
442
- .values(id=1, schema_version=SCHEMA_VERSION)
443
- .on_conflict_do_nothing(index_elements=["id"])
444
- )
445
- self.db.execute(stmt)
446
-
447
386
  def _init_tables(self) -> None:
448
387
  """Initialize tables."""
449
- self.db.create_table(self._namespaces, if_not_exists=True)
450
- self.default_table_names.append(self._namespaces.name)
451
- self.db.create_table(self._projects, if_not_exists=True)
452
- self.default_table_names.append(self._projects.name)
453
388
  self.db.create_table(self._datasets, if_not_exists=True)
454
389
  self.default_table_names.append(self._datasets.name)
455
390
  self.db.create_table(self._datasets_versions, if_not_exists=True)
@@ -459,55 +394,10 @@ class SQLiteMetastore(AbstractDBMetastore):
459
394
  self.db.create_table(self._jobs, if_not_exists=True)
460
395
  self.default_table_names.append(self._jobs.name)
461
396
 
462
- def _init_namespaces_projects(self) -> None:
463
- """
464
- Creates local namespace and local project connected to it.
465
- In local environment user cannot explicitly create other namespaces and
466
- projects and all datasets user creates will be stored in those.
467
- When pulling dataset from Studio, then other namespaces and projects will
468
- be created implicitly though, to keep the same fully qualified name with
469
- Studio dataset.
470
- """
471
- system_namespace = self.create_namespace(Namespace.system(), "System namespace")
472
- self.create_project(system_namespace.name, Project.listing(), "Listing project")
473
-
474
- local_namespace = self.create_namespace(Namespace.default(), "Local namespace")
475
- self.create_project(local_namespace.name, Project.default(), "Local project")
476
-
477
- def _check_schema_version(self) -> None:
478
- """
479
- Checks if current DB schema is up to date with latest DB model and schema
480
- version. If not, OutdatedDatabaseSchemaError is raised.
481
- """
482
- schema_version = next(self.db.execute(self._meta_select()))[1]
483
- if schema_version < SCHEMA_VERSION:
484
- raise OutdatedDatabaseSchemaError(OUTDATED_SCHEMA_ERROR_MESSAGE)
485
-
486
- #
487
- # Dataset dependencies
488
- #
489
- @classmethod
490
- def _meta_columns(cls) -> list["SchemaItem"]:
491
- return [
492
- Column("id", Integer, primary_key=True),
493
- Column("schema_version", Integer, default=SCHEMA_VERSION),
494
- ]
495
-
496
397
  @classmethod
497
398
  def _datasets_columns(cls) -> list["SchemaItem"]:
498
399
  """Datasets table columns."""
499
- return [*super()._datasets_columns(), UniqueConstraint("project_id", "name")]
500
-
501
- @classmethod
502
- def _namespaces_columns(cls) -> list["SchemaItem"]:
503
- """Datasets table columns."""
504
- return [*super()._namespaces_columns(), UniqueConstraint("name")]
505
-
506
- def _namespaces_insert(self) -> "Insert":
507
- return sqlite.insert(self._namespaces)
508
-
509
- def _projects_insert(self) -> "Insert":
510
- return sqlite.insert(self._projects)
400
+ return [*super()._datasets_columns(), UniqueConstraint("name")]
511
401
 
512
402
  def _datasets_insert(self) -> "Insert":
513
403
  return sqlite.insert(self._datasets)
@@ -524,8 +414,6 @@ class SQLiteMetastore(AbstractDBMetastore):
524
414
 
525
415
  def _dataset_dependencies_select_columns(self) -> list["SchemaItem"]:
526
416
  return [
527
- self._namespaces.c.name,
528
- self._projects.c.name,
529
417
  self._datasets_dependencies.c.id,
530
418
  self._datasets_dependencies.c.dataset_id,
531
419
  self._datasets_dependencies.c.dataset_version_id,
@@ -541,26 +429,6 @@ class SQLiteMetastore(AbstractDBMetastore):
541
429
  def _jobs_insert(self) -> "Insert":
542
430
  return sqlite.insert(self._jobs)
543
431
 
544
- @property
545
- def is_studio(self) -> bool:
546
- return False
547
-
548
- #
549
- # Namespaces
550
- #
551
-
552
- @property
553
- def default_namespace_name(self):
554
- return Namespace.default()
555
-
556
- #
557
- # Projects
558
- #
559
-
560
- @property
561
- def default_project_name(self):
562
- return Project.default()
563
-
564
432
 
565
433
  class SQLiteWarehouse(AbstractWarehouse):
566
434
  """
@@ -666,16 +534,16 @@ class SQLiteWarehouse(AbstractWarehouse):
666
534
  ) -> None:
667
535
  dst_empty = False
668
536
 
669
- if not self.db.has_table(self.dataset_table_name(src, src_version)):
537
+ if not self.db.has_table(self.dataset_table_name(src.name, src_version)):
670
538
  # source table doesn't exist, nothing to do
671
539
  return
672
540
 
673
541
  src_dr = self.dataset_rows(src, src_version).table
674
542
 
675
- if not self.db.has_table(self.dataset_table_name(dst, dst_version)):
543
+ if not self.db.has_table(self.dataset_table_name(dst.name, dst_version)):
676
544
  # destination table doesn't exist, create it
677
545
  self.create_dataset_rows_table(
678
- self.dataset_table_name(dst, dst_version),
546
+ self.dataset_table_name(dst.name, dst_version),
679
547
  columns=src_dr.columns,
680
548
  )
681
549
  dst_empty = True
@@ -182,7 +182,7 @@ class AbstractWarehouse(ABC, Serializable):
182
182
  ):
183
183
  version = version or dataset.latest_version
184
184
 
185
- table_name = self.dataset_table_name(dataset, version)
185
+ table_name = self.dataset_table_name(dataset.name, version)
186
186
  return self.schema.dataset_row_cls(
187
187
  table_name,
188
188
  self.db,
@@ -254,24 +254,12 @@ class AbstractWarehouse(ABC, Serializable):
254
254
  name = parsed.path if parsed.scheme == "file" else parsed.netloc
255
255
  return parsed.scheme, name
256
256
 
257
- def dataset_table_name(self, dataset: DatasetRecord, version: str) -> str:
258
- return self._construct_dataset_table_name(
259
- dataset.project.namespace.name,
260
- dataset.project.name,
261
- dataset.name,
262
- version,
263
- )
264
-
265
- def _construct_dataset_table_name(
266
- self, namespace: str, project: str, dataset_name: str, version: str
267
- ) -> str:
257
+ def dataset_table_name(self, dataset_name: str, version: str) -> str:
268
258
  prefix = self.DATASET_TABLE_PREFIX
269
259
  if Client.is_data_source_uri(dataset_name):
270
260
  # for datasets that are created for bucket listing we use different prefix
271
261
  prefix = self.DATASET_SOURCE_TABLE_PREFIX
272
- return (
273
- f"{prefix}{namespace}_{project}_{dataset_name}_{version.replace('.', '_')}"
274
- )
262
+ return f"{prefix}{dataset_name}_{version.replace('.', '_')}"
275
263
 
276
264
  def temp_table_name(self) -> str:
277
265
  return self.TMP_TABLE_NAME_PREFIX + _random_string(6)
@@ -299,7 +287,7 @@ class AbstractWarehouse(ABC, Serializable):
299
287
  if_exists: bool = True,
300
288
  ) -> None:
301
289
  """Drops a dataset rows table for the given dataset name."""
302
- table_name = self.dataset_table_name(dataset, version)
290
+ table_name = self.dataset_table_name(dataset.name, version)
303
291
  table = sa.Table(table_name, self.db.metadata)
304
292
  self.db.drop_table(table, if_exists=if_exists)
305
293
 
@@ -356,20 +344,13 @@ class AbstractWarehouse(ABC, Serializable):
356
344
 
357
345
  def rename_dataset_table(
358
346
  self,
359
- dataset: DatasetRecord,
360
347
  old_name: str,
361
348
  new_name: str,
362
349
  old_version: str,
363
350
  new_version: str,
364
351
  ) -> None:
365
- namespace = dataset.project.namespace.name
366
- project = dataset.project.name
367
- old_ds_table_name = self._construct_dataset_table_name(
368
- namespace, project, old_name, old_version
369
- )
370
- new_ds_table_name = self._construct_dataset_table_name(
371
- namespace, project, new_name, new_version
372
- )
352
+ old_ds_table_name = self.dataset_table_name(old_name, old_version)
353
+ new_ds_table_name = self.dataset_table_name(new_name, new_version)
373
354
 
374
355
  self.db.rename_table(old_ds_table_name, new_ds_table_name)
375
356
 
@@ -387,7 +368,7 @@ class AbstractWarehouse(ABC, Serializable):
387
368
  """
388
369
  Returns tuple with dataset stats: total number of rows and total dataset size.
389
370
  """
390
- if not (self.db.has_table(self.dataset_table_name(dataset, version))):
371
+ if not (self.db.has_table(self.dataset_table_name(dataset.name, version))):
391
372
  return None, None
392
373
 
393
374
  file_signals = list(
datachain/dataset.py CHANGED
@@ -13,9 +13,7 @@ from typing import (
13
13
  from urllib.parse import urlparse
14
14
 
15
15
  from datachain import semver
16
- from datachain.error import DatasetVersionNotFoundError, InvalidDatasetNameError
17
- from datachain.namespace import Namespace
18
- from datachain.project import Project
16
+ from datachain.error import DatasetVersionNotFoundError
19
17
  from datachain.sql.types import NAME_TYPES_MAPPING, SQLType
20
18
 
21
19
  T = TypeVar("T", bound="DatasetRecord")
@@ -29,8 +27,6 @@ QUERY_DATASET_PREFIX = "ds_query_"
29
27
  LISTING_PREFIX = "lst__"
30
28
 
31
29
  DEFAULT_DATASET_VERSION = "1.0.0"
32
- DATASET_NAME_RESERVED_CHARS = ["."]
33
- DATASET_NAME_REPLACEMENT_CHAR = "_"
34
30
 
35
31
 
36
32
  # StorageURI represents a normalised URI to a valid storage location (full bucket or
@@ -61,35 +57,20 @@ def parse_dataset_uri(uri: str) -> tuple[str, Optional[str]]:
61
57
  return name, s[1]
62
58
 
63
59
 
64
- def create_dataset_uri(
65
- name: str, namespace: str, project: str, version: Optional[str] = None
66
- ) -> str:
60
+ def create_dataset_uri(name: str, version: Optional[str] = None) -> str:
67
61
  """
68
- Creates a dataset uri based on namespace, project, dataset name and optionally
69
- version.
62
+ Creates a dataset uri based on dataset name and optionally version
70
63
  Example:
71
- Input: dev, clothes, zalando, 3.0.1
72
- Output: ds//dev.clothes.zalando@v3.0.1
64
+ Input: zalando, 3.0.1
65
+ Output: ds//zalando@v3.0.1
73
66
  """
74
- uri = f"{DATASET_PREFIX}{namespace}.{project}.{name}"
67
+ uri = f"{DATASET_PREFIX}{name}"
75
68
  if version:
76
69
  uri += f"@v{version}"
77
70
 
78
71
  return uri
79
72
 
80
73
 
81
- def parse_dataset_name(name: str) -> tuple[Optional[str], Optional[str], str]:
82
- """Parses dataset name and returns namespace, project and name"""
83
- if not name:
84
- raise ValueError("Name must be defined to parse it")
85
- split = name.split(".")
86
- name = split[-1]
87
- project_name = split[-2] if len(split) > 1 else None
88
- namespace_name = split[-3] if len(split) > 2 else None
89
-
90
- return namespace_name, project_name, name
91
-
92
-
93
74
  class DatasetDependencyType:
94
75
  DATASET = "dataset"
95
76
  STORAGE = "storage"
@@ -97,12 +78,8 @@ class DatasetDependencyType:
97
78
 
98
79
  @dataclass
99
80
  class DatasetDependency:
100
- # TODO put `DatasetRecord` instead of name + version which will
101
- # simplify codebase in various places
102
81
  id: int
103
82
  type: str
104
- namespace: str
105
- project: str
106
83
  name: str
107
84
  version: str
108
85
  created_at: datetime
@@ -123,8 +100,6 @@ class DatasetDependency:
123
100
  @classmethod
124
101
  def parse(
125
102
  cls: builtins.type[DD],
126
- namespace_name: str,
127
- project_name: str,
128
103
  id: int,
129
104
  dataset_id: Optional[int],
130
105
  dataset_version_id: Optional[int],
@@ -146,8 +121,6 @@ class DatasetDependency:
146
121
  if is_listing_dataset(dataset_name)
147
122
  else DatasetDependencyType.DATASET
148
123
  ),
149
- namespace_name,
150
- project_name,
151
124
  dataset_name,
152
125
  (
153
126
  dataset_version # type: ignore[arg-type]
@@ -362,7 +335,6 @@ class DatasetListVersion:
362
335
  class DatasetRecord:
363
336
  id: int
364
337
  name: str
365
- project: Project
366
338
  description: Optional[str]
367
339
  attrs: list[str]
368
340
  schema: dict[str, Union[SQLType, type[SQLType]]]
@@ -377,9 +349,6 @@ class DatasetRecord:
377
349
  sources: str = ""
378
350
  query_script: str = ""
379
351
 
380
- def __hash__(self):
381
- return hash(f"{self.id}")
382
-
383
352
  @staticmethod
384
353
  def parse_schema(
385
354
  ct: dict[str, Any],
@@ -389,31 +358,10 @@ class DatasetRecord:
389
358
  for c_name, c_type in ct.items()
390
359
  }
391
360
 
392
- @staticmethod
393
- def validate_name(name: str) -> None:
394
- """Throws exception if name has reserved characters"""
395
- for c in DATASET_NAME_RESERVED_CHARS:
396
- if c in name:
397
- raise InvalidDatasetNameError(
398
- f"Character {c} is reserved and not allowed in dataset name"
399
- )
400
-
401
361
  @classmethod
402
362
  def parse( # noqa: PLR0913
403
363
  cls,
404
- namespace_id: int,
405
- namespace_uuid: str,
406
- namespace_name: str,
407
- namespace_description: Optional[str],
408
- namespace_created_at: datetime,
409
- project_id: int,
410
- project_uuid: str,
411
- project_name: str,
412
- project_description: Optional[str],
413
- project_created_at: datetime,
414
- project_namespace_id: int,
415
- dataset_id: int,
416
- dataset_project_id: int,
364
+ id: int,
417
365
  name: str,
418
366
  description: Optional[str],
419
367
  attrs: str,
@@ -452,23 +400,6 @@ class DatasetRecord:
452
400
  json.loads(version_schema) if version_schema else {}
453
401
  )
454
402
 
455
- namespace = Namespace(
456
- namespace_id,
457
- namespace_uuid,
458
- namespace_name,
459
- namespace_description,
460
- namespace_created_at,
461
- )
462
-
463
- project = Project(
464
- project_id,
465
- project_uuid,
466
- project_name,
467
- project_description,
468
- project_created_at,
469
- namespace,
470
- )
471
-
472
403
  dataset_version = DatasetVersion.parse(
473
404
  version_id,
474
405
  version_uuid,
@@ -491,9 +422,8 @@ class DatasetRecord:
491
422
  )
492
423
 
493
424
  return cls(
494
- dataset_id,
425
+ id,
495
426
  name,
496
- project,
497
427
  description,
498
428
  attrs_lst,
499
429
  cls.parse_schema(schema_dct), # type: ignore[arg-type]
@@ -518,10 +448,6 @@ class DatasetRecord:
518
448
  for c_name, c_type in self.schema.items()
519
449
  }
520
450
 
521
- @property
522
- def full_name(self) -> str:
523
- return f"{self.project.namespace.name}.{self.project.name}.{self.name}"
524
-
525
451
  def get_schema(self, version: str) -> dict[str, Union[SQLType, type[SQLType]]]:
526
452
  return self.get_version(version).schema if version else self.schema
527
453
 
@@ -601,10 +527,7 @@ class DatasetRecord:
601
527
  Dataset uri example: ds://dogs@v3.0.1
602
528
  """
603
529
  identifier = self.identifier(version)
604
- return (
605
- f"{DATASET_PREFIX}{self.project.namespace.name}"
606
- f".{self.project.name}.{identifier}"
607
- )
530
+ return f"{DATASET_PREFIX}{identifier}"
608
531
 
609
532
  @property
610
533
  def next_version_major(self) -> str:
@@ -669,17 +592,15 @@ class DatasetRecord:
669
592
 
670
593
  @classmethod
671
594
  def from_dict(cls, d: dict[str, Any]) -> "DatasetRecord":
672
- project = Project.from_dict(d.pop("project"))
673
595
  versions = [DatasetVersion.from_dict(v) for v in d.pop("versions", [])]
674
596
  kwargs = {f.name: d[f.name] for f in fields(cls) if f.name in d}
675
- return cls(**kwargs, versions=versions, project=project)
597
+ return cls(**kwargs, versions=versions)
676
598
 
677
599
 
678
600
  @dataclass
679
601
  class DatasetListRecord:
680
602
  id: int
681
603
  name: str
682
- project: Project
683
604
  description: Optional[str]
684
605
  attrs: list[str]
685
606
  versions: list[DatasetListVersion]
@@ -688,18 +609,7 @@ class DatasetListRecord:
688
609
  @classmethod
689
610
  def parse( # noqa: PLR0913
690
611
  cls,
691
- namespace_id: int,
692
- namespace_uuid: str,
693
- namespace_name: str,
694
- namespace_description: Optional[str],
695
- namespace_created_at: datetime,
696
- project_id: int,
697
- project_uuid: str,
698
- project_name: str,
699
- project_description: Optional[str],
700
- project_created_at: datetime,
701
- project_namespace_id: int,
702
- dataset_id: int,
612
+ id: int,
703
613
  name: str,
704
614
  description: Optional[str],
705
615
  attrs: str,
@@ -720,23 +630,6 @@ class DatasetListRecord:
720
630
  ) -> "DatasetListRecord":
721
631
  attrs_lst: list[str] = json.loads(attrs) if attrs else []
722
632
 
723
- namespace = Namespace(
724
- namespace_id,
725
- namespace_uuid,
726
- namespace_name,
727
- namespace_description,
728
- namespace_created_at,
729
- )
730
-
731
- project = Project(
732
- project_id,
733
- project_uuid,
734
- project_name,
735
- project_description,
736
- project_created_at,
737
- namespace,
738
- )
739
-
740
633
  dataset_version = DatasetListVersion.parse(
741
634
  version_id,
742
635
  version_uuid,
@@ -754,19 +647,14 @@ class DatasetListRecord:
754
647
  )
755
648
 
756
649
  return cls(
757
- dataset_id,
650
+ id,
758
651
  name,
759
- project,
760
652
  description,
761
653
  attrs_lst,
762
654
  [dataset_version],
763
655
  created_at,
764
656
  )
765
657
 
766
- @property
767
- def full_name(self) -> str:
768
- return f"{self.project.namespace.name}.{self.project.name}.{self.name}"
769
-
770
658
  def merge_versions(self, other: "DatasetListRecord") -> "DatasetListRecord":
771
659
  """Merge versions from another dataset"""
772
660
  if other.id != self.id:
@@ -803,11 +691,9 @@ class DatasetListRecord:
803
691
 
804
692
  @classmethod
805
693
  def from_dict(cls, d: dict[str, Any]) -> "DatasetListRecord":
806
- project = Project.from_dict(d.pop("project"))
807
694
  versions = [DatasetListVersion.parse(**v) for v in d.get("versions", [])]
808
695
  kwargs = {f.name: d[f.name] for f in fields(cls) if f.name in d}
809
696
  kwargs["versions"] = versions
810
- kwargs["project"] = project
811
697
  return cls(**kwargs)
812
698
 
813
699
 
datachain/delta.py CHANGED
@@ -56,13 +56,11 @@ def _get_delta_chain(
56
56
  compare: Optional[Union[str, Sequence[str]]] = None,
57
57
  ) -> "DataChain":
58
58
  """Get delta chain for processing changes between versions."""
59
- source_dc = datachain.read_dataset(source_ds_name, version=source_ds_version)
60
- source_dc_latest = datachain.read_dataset(
61
- source_ds_name, version=source_ds_latest_version
62
- )
59
+ source_dc = datachain.read_dataset(source_ds_name, source_ds_version)
60
+ source_dc_latest = datachain.read_dataset(source_ds_name, source_ds_latest_version)
63
61
 
64
62
  # Calculate diff between source versions
65
- return source_dc_latest.diff(source_dc, on=on, compare=compare, deleted=False)
63
+ return source_dc_latest.compare(source_dc, on=on, compare=compare, deleted=False)
66
64
 
67
65
 
68
66
  def _get_retry_chain(
@@ -81,10 +79,8 @@ def _get_retry_chain(
81
79
  retry_chain = None
82
80
 
83
81
  # Read the latest version of the result dataset for retry logic
84
- result_dataset = datachain.read_dataset(name, version=latest_version)
85
- source_dc_latest = datachain.read_dataset(
86
- source_ds_name, version=source_ds_latest_version
87
- )
82
+ result_dataset = datachain.read_dataset(name, latest_version)
83
+ source_dc_latest = datachain.read_dataset(source_ds_name, source_ds_latest_version)
88
84
 
89
85
  # Handle error records if delta_retry is a string (column name)
90
86
  if isinstance(delta_retry, str):
@@ -236,8 +232,8 @@ def delta_retry_update(
236
232
  if processing_chain is None or (processing_chain and processing_chain.empty):
237
233
  return None, None, False
238
234
 
239
- latest_dataset = datachain.read_dataset(name, version=latest_version)
240
- compared_chain = latest_dataset.diff(
235
+ latest_dataset = datachain.read_dataset(name, latest_version)
236
+ compared_chain = latest_dataset.compare(
241
237
  processing_chain,
242
238
  on=right_on or on,
243
239
  added=True,
datachain/error.py CHANGED
@@ -2,42 +2,10 @@ class DataChainError(RuntimeError):
2
2
  pass
3
3
 
4
4
 
5
- class InvalidDatasetNameError(RuntimeError):
6
- pass
7
-
8
-
9
- class InvalidNamespaceNameError(RuntimeError):
10
- pass
11
-
12
-
13
- class InvalidProjectNameError(RuntimeError):
14
- pass
15
-
16
-
17
5
  class NotFoundError(Exception):
18
6
  pass
19
7
 
20
8
 
21
- class NamespaceNotFoundError(NotFoundError):
22
- pass
23
-
24
-
25
- class NotAllowedError(Exception):
26
- pass
27
-
28
-
29
- class NamespaceCreateNotAllowedError(NotAllowedError):
30
- pass
31
-
32
-
33
- class ProjectCreateNotAllowedError(NotAllowedError):
34
- pass
35
-
36
-
37
- class ProjectNotFoundError(NotFoundError):
38
- pass
39
-
40
-
41
9
  class DatasetNotFoundError(NotFoundError):
42
10
  pass
43
11
 
@@ -85,7 +53,3 @@ class ClientError(RuntimeError):
85
53
 
86
54
  class TableMissingError(DataChainError):
87
55
  pass
88
-
89
-
90
- class OutdatedDatabaseSchemaError(DataChainError):
91
- pass