datachain 0.34.6__py3-none-any.whl → 0.34.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datachain might be problematic. Click here for more details.

Files changed (105) hide show
  1. datachain/asyn.py +11 -12
  2. datachain/cache.py +5 -5
  3. datachain/catalog/catalog.py +75 -83
  4. datachain/catalog/loader.py +3 -3
  5. datachain/checkpoint.py +1 -2
  6. datachain/cli/__init__.py +2 -4
  7. datachain/cli/commands/datasets.py +13 -13
  8. datachain/cli/commands/ls.py +4 -4
  9. datachain/cli/commands/query.py +3 -3
  10. datachain/cli/commands/show.py +2 -2
  11. datachain/cli/parser/job.py +1 -1
  12. datachain/cli/parser/utils.py +1 -2
  13. datachain/cli/utils.py +1 -2
  14. datachain/client/azure.py +2 -2
  15. datachain/client/fsspec.py +11 -21
  16. datachain/client/gcs.py +3 -3
  17. datachain/client/http.py +4 -4
  18. datachain/client/local.py +4 -4
  19. datachain/client/s3.py +3 -3
  20. datachain/config.py +4 -8
  21. datachain/data_storage/db_engine.py +5 -5
  22. datachain/data_storage/metastore.py +107 -107
  23. datachain/data_storage/schema.py +18 -24
  24. datachain/data_storage/sqlite.py +21 -28
  25. datachain/data_storage/warehouse.py +13 -13
  26. datachain/dataset.py +64 -70
  27. datachain/delta.py +21 -18
  28. datachain/diff/__init__.py +13 -13
  29. datachain/func/aggregate.py +9 -11
  30. datachain/func/array.py +12 -12
  31. datachain/func/base.py +7 -4
  32. datachain/func/conditional.py +9 -13
  33. datachain/func/func.py +45 -42
  34. datachain/func/numeric.py +5 -7
  35. datachain/func/string.py +2 -2
  36. datachain/hash_utils.py +54 -81
  37. datachain/job.py +8 -8
  38. datachain/lib/arrow.py +17 -14
  39. datachain/lib/audio.py +6 -6
  40. datachain/lib/clip.py +5 -4
  41. datachain/lib/convert/python_to_sql.py +4 -22
  42. datachain/lib/convert/values_to_tuples.py +4 -9
  43. datachain/lib/data_model.py +20 -19
  44. datachain/lib/dataset_info.py +6 -6
  45. datachain/lib/dc/csv.py +10 -10
  46. datachain/lib/dc/database.py +28 -29
  47. datachain/lib/dc/datachain.py +98 -97
  48. datachain/lib/dc/datasets.py +22 -22
  49. datachain/lib/dc/hf.py +4 -4
  50. datachain/lib/dc/json.py +9 -10
  51. datachain/lib/dc/listings.py +5 -8
  52. datachain/lib/dc/pandas.py +3 -6
  53. datachain/lib/dc/parquet.py +5 -5
  54. datachain/lib/dc/records.py +5 -5
  55. datachain/lib/dc/storage.py +12 -12
  56. datachain/lib/dc/storage_pattern.py +2 -2
  57. datachain/lib/dc/utils.py +11 -14
  58. datachain/lib/dc/values.py +3 -6
  59. datachain/lib/file.py +26 -26
  60. datachain/lib/hf.py +7 -5
  61. datachain/lib/image.py +13 -13
  62. datachain/lib/listing.py +5 -5
  63. datachain/lib/listing_info.py +1 -2
  64. datachain/lib/meta_formats.py +1 -2
  65. datachain/lib/model_store.py +3 -3
  66. datachain/lib/namespaces.py +4 -6
  67. datachain/lib/projects.py +5 -9
  68. datachain/lib/pytorch.py +10 -10
  69. datachain/lib/settings.py +23 -23
  70. datachain/lib/signal_schema.py +52 -44
  71. datachain/lib/text.py +8 -7
  72. datachain/lib/udf.py +25 -17
  73. datachain/lib/udf_signature.py +11 -11
  74. datachain/lib/video.py +3 -4
  75. datachain/lib/webdataset.py +30 -35
  76. datachain/lib/webdataset_laion.py +15 -16
  77. datachain/listing.py +4 -4
  78. datachain/model/bbox.py +3 -1
  79. datachain/namespace.py +4 -4
  80. datachain/node.py +6 -6
  81. datachain/nodes_thread_pool.py +0 -1
  82. datachain/plugins.py +1 -7
  83. datachain/project.py +4 -4
  84. datachain/query/batch.py +7 -8
  85. datachain/query/dataset.py +80 -87
  86. datachain/query/dispatch.py +7 -7
  87. datachain/query/metrics.py +3 -4
  88. datachain/query/params.py +2 -3
  89. datachain/query/schema.py +7 -6
  90. datachain/query/session.py +7 -7
  91. datachain/query/udf.py +8 -7
  92. datachain/query/utils.py +3 -5
  93. datachain/remote/studio.py +33 -39
  94. datachain/script_meta.py +12 -12
  95. datachain/sql/sqlite/base.py +6 -9
  96. datachain/studio.py +30 -30
  97. datachain/toolkit/split.py +1 -2
  98. datachain/utils.py +21 -21
  99. {datachain-0.34.6.dist-info → datachain-0.34.7.dist-info}/METADATA +2 -3
  100. datachain-0.34.7.dist-info/RECORD +173 -0
  101. datachain-0.34.6.dist-info/RECORD +0 -173
  102. {datachain-0.34.6.dist-info → datachain-0.34.7.dist-info}/WHEEL +0 -0
  103. {datachain-0.34.6.dist-info → datachain-0.34.7.dist-info}/entry_points.txt +0 -0
  104. {datachain-0.34.6.dist-info → datachain-0.34.7.dist-info}/licenses/LICENSE +0 -0
  105. {datachain-0.34.6.dist-info → datachain-0.34.7.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,7 @@ from collections.abc import Iterator
7
7
  from datetime import datetime, timezone
8
8
  from functools import cached_property, reduce
9
9
  from itertools import groupby
10
- from typing import TYPE_CHECKING, Any, Optional
10
+ from typing import TYPE_CHECKING, Any
11
11
  from uuid import uuid4
12
12
 
13
13
  from sqlalchemy import (
@@ -83,7 +83,7 @@ class AbstractMetastore(ABC, Serializable):
83
83
 
84
84
  def __init__(
85
85
  self,
86
- uri: Optional[StorageURI] = None,
86
+ uri: StorageURI | None = None,
87
87
  ):
88
88
  self.uri = uri or StorageURI("")
89
89
 
@@ -97,7 +97,7 @@ class AbstractMetastore(ABC, Serializable):
97
97
  @abstractmethod
98
98
  def clone(
99
99
  self,
100
- uri: Optional[StorageURI] = None,
100
+ uri: StorageURI | None = None,
101
101
  use_new_connection: bool = False,
102
102
  ) -> "AbstractMetastore":
103
103
  """Clones AbstractMetastore implementation for some Storage input.
@@ -137,8 +137,8 @@ class AbstractMetastore(ABC, Serializable):
137
137
  def create_namespace(
138
138
  self,
139
139
  name: str,
140
- description: Optional[str] = None,
141
- uuid: Optional[str] = None,
140
+ description: str | None = None,
141
+ uuid: str | None = None,
142
142
  ignore_if_exists: bool = True,
143
143
  validate: bool = True,
144
144
  **kwargs,
@@ -185,8 +185,8 @@ class AbstractMetastore(ABC, Serializable):
185
185
  self,
186
186
  namespace_name: str,
187
187
  name: str,
188
- description: Optional[str] = None,
189
- uuid: Optional[str] = None,
188
+ description: str | None = None,
189
+ uuid: str | None = None,
190
190
  ignore_if_exists: bool = True,
191
191
  validate: bool = True,
192
192
  **kwargs,
@@ -219,7 +219,7 @@ class AbstractMetastore(ABC, Serializable):
219
219
  """Gets a single project by id"""
220
220
 
221
221
  @abstractmethod
222
- def count_projects(self, namespace_id: Optional[int] = None) -> int:
222
+ def count_projects(self, namespace_id: int | None = None) -> int:
223
223
  """Counts projects in some namespace or in general."""
224
224
 
225
225
  @abstractmethod
@@ -227,7 +227,7 @@ class AbstractMetastore(ABC, Serializable):
227
227
  """Removes a single project by id"""
228
228
 
229
229
  @abstractmethod
230
- def list_projects(self, namespace_id: Optional[int], conn=None) -> list[Project]:
230
+ def list_projects(self, namespace_id: int | None, conn=None) -> list[Project]:
231
231
  """Gets list of projects in some namespace or in general (in all namespaces)"""
232
232
 
233
233
  #
@@ -237,15 +237,15 @@ class AbstractMetastore(ABC, Serializable):
237
237
  def create_dataset(
238
238
  self,
239
239
  name: str,
240
- project_id: Optional[int] = None,
240
+ project_id: int | None = None,
241
241
  status: int = DatasetStatus.CREATED,
242
- sources: Optional[list[str]] = None,
243
- feature_schema: Optional[dict] = None,
242
+ sources: list[str] | None = None,
243
+ feature_schema: dict | None = None,
244
244
  query_script: str = "",
245
- schema: Optional[dict[str, Any]] = None,
245
+ schema: dict[str, Any] | None = None,
246
246
  ignore_if_exists: bool = False,
247
- description: Optional[str] = None,
248
- attrs: Optional[list[str]] = None,
247
+ description: str | None = None,
248
+ attrs: list[str] | None = None,
249
249
  ) -> DatasetRecord:
250
250
  """Creates new dataset."""
251
251
 
@@ -256,20 +256,20 @@ class AbstractMetastore(ABC, Serializable):
256
256
  version: str,
257
257
  status: int,
258
258
  sources: str = "",
259
- feature_schema: Optional[dict] = None,
259
+ feature_schema: dict | None = None,
260
260
  query_script: str = "",
261
261
  error_message: str = "",
262
262
  error_stack: str = "",
263
263
  script_output: str = "",
264
- created_at: Optional[datetime] = None,
265
- finished_at: Optional[datetime] = None,
266
- schema: Optional[dict[str, Any]] = None,
264
+ created_at: datetime | None = None,
265
+ finished_at: datetime | None = None,
266
+ schema: dict[str, Any] | None = None,
267
267
  ignore_if_exists: bool = False,
268
- num_objects: Optional[int] = None,
269
- size: Optional[int] = None,
270
- preview: Optional[list[dict]] = None,
271
- job_id: Optional[str] = None,
272
- uuid: Optional[str] = None,
268
+ num_objects: int | None = None,
269
+ size: int | None = None,
270
+ preview: list[dict] | None = None,
271
+ job_id: str | None = None,
272
+ uuid: str | None = None,
273
273
  ) -> DatasetRecord:
274
274
  """Creates new dataset version."""
275
275
 
@@ -298,17 +298,17 @@ class AbstractMetastore(ABC, Serializable):
298
298
 
299
299
  @abstractmethod
300
300
  def list_datasets(
301
- self, project_id: Optional[int] = None
301
+ self, project_id: int | None = None
302
302
  ) -> Iterator[DatasetListRecord]:
303
303
  """Lists all datasets in some project or in all projects."""
304
304
 
305
305
  @abstractmethod
306
- def count_datasets(self, project_id: Optional[int] = None) -> int:
306
+ def count_datasets(self, project_id: int | None = None) -> int:
307
307
  """Counts datasets in some project or in all projects."""
308
308
 
309
309
  @abstractmethod
310
310
  def list_datasets_by_prefix(
311
- self, prefix: str, project_id: Optional[int] = None
311
+ self, prefix: str, project_id: int | None = None
312
312
  ) -> Iterator["DatasetListRecord"]:
313
313
  """
314
314
  Lists all datasets which names start with prefix in some project or in all
@@ -319,8 +319,8 @@ class AbstractMetastore(ABC, Serializable):
319
319
  def get_dataset(
320
320
  self,
321
321
  name: str, # normal, not full dataset name
322
- namespace_name: Optional[str] = None,
323
- project_name: Optional[str] = None,
322
+ namespace_name: str | None = None,
323
+ project_name: str | None = None,
324
324
  conn=None,
325
325
  ) -> DatasetRecord:
326
326
  """Gets a single dataset by name."""
@@ -330,7 +330,7 @@ class AbstractMetastore(ABC, Serializable):
330
330
  self,
331
331
  dataset: DatasetRecord,
332
332
  status: int,
333
- version: Optional[str] = None,
333
+ version: str | None = None,
334
334
  error_message="",
335
335
  error_stack="",
336
336
  script_output="",
@@ -355,20 +355,20 @@ class AbstractMetastore(ABC, Serializable):
355
355
  self,
356
356
  source_dataset: DatasetRecord,
357
357
  source_dataset_version: str,
358
- new_source_dataset: Optional[DatasetRecord] = None,
359
- new_source_dataset_version: Optional[str] = None,
358
+ new_source_dataset: DatasetRecord | None = None,
359
+ new_source_dataset_version: str | None = None,
360
360
  ) -> None:
361
361
  """Updates dataset dependency source."""
362
362
 
363
363
  @abstractmethod
364
364
  def get_direct_dataset_dependencies(
365
365
  self, dataset: DatasetRecord, version: str
366
- ) -> list[Optional[DatasetDependency]]:
366
+ ) -> list[DatasetDependency | None]:
367
367
  """Gets direct dataset dependencies."""
368
368
 
369
369
  @abstractmethod
370
370
  def remove_dataset_dependencies(
371
- self, dataset: DatasetRecord, version: Optional[str] = None
371
+ self, dataset: DatasetRecord, version: str | None = None
372
372
  ) -> None:
373
373
  """
374
374
  When we remove dataset, we need to clean up it's dependencies as well.
@@ -376,7 +376,7 @@ class AbstractMetastore(ABC, Serializable):
376
376
 
377
377
  @abstractmethod
378
378
  def remove_dataset_dependants(
379
- self, dataset: DatasetRecord, version: Optional[str] = None
379
+ self, dataset: DatasetRecord, version: str | None = None
380
380
  ) -> None:
381
381
  """
382
382
  When we remove dataset, we need to clear its references in other dataset
@@ -398,9 +398,9 @@ class AbstractMetastore(ABC, Serializable):
398
398
  query_type: JobQueryType = JobQueryType.PYTHON,
399
399
  status: JobStatus = JobStatus.CREATED,
400
400
  workers: int = 1,
401
- python_version: Optional[str] = None,
402
- params: Optional[dict[str, str]] = None,
403
- parent_job_id: Optional[str] = None,
401
+ python_version: str | None = None,
402
+ params: dict[str, str] | None = None,
403
+ parent_job_id: str | None = None,
404
404
  ) -> str:
405
405
  """
406
406
  Creates a new job.
@@ -408,19 +408,19 @@ class AbstractMetastore(ABC, Serializable):
408
408
  """
409
409
 
410
410
  @abstractmethod
411
- def get_job(self, job_id: str) -> Optional[Job]:
411
+ def get_job(self, job_id: str) -> Job | None:
412
412
  """Returns the job with the given ID."""
413
413
 
414
414
  @abstractmethod
415
415
  def update_job(
416
416
  self,
417
417
  job_id: str,
418
- status: Optional[JobStatus] = None,
419
- error_message: Optional[str] = None,
420
- error_stack: Optional[str] = None,
421
- finished_at: Optional[datetime] = None,
422
- metrics: Optional[dict[str, Any]] = None,
423
- ) -> Optional["Job"]:
418
+ status: JobStatus | None = None,
419
+ error_message: str | None = None,
420
+ error_stack: str | None = None,
421
+ finished_at: datetime | None = None,
422
+ metrics: dict[str, Any] | None = None,
423
+ ) -> Job | None:
424
424
  """Updates job fields."""
425
425
 
426
426
  @abstractmethod
@@ -428,13 +428,13 @@ class AbstractMetastore(ABC, Serializable):
428
428
  self,
429
429
  job_id: str,
430
430
  status: JobStatus,
431
- error_message: Optional[str] = None,
432
- error_stack: Optional[str] = None,
431
+ error_message: str | None = None,
432
+ error_stack: str | None = None,
433
433
  ) -> None:
434
434
  """Set the status of the given job."""
435
435
 
436
436
  @abstractmethod
437
- def get_job_status(self, job_id: str) -> Optional[JobStatus]:
437
+ def get_job_status(self, job_id: str) -> JobStatus | None:
438
438
  """Returns the status of the given job."""
439
439
 
440
440
  #
@@ -442,11 +442,11 @@ class AbstractMetastore(ABC, Serializable):
442
442
  #
443
443
 
444
444
  @abstractmethod
445
- def list_checkpoints(self, job_id: str, conn=None) -> Iterator["Checkpoint"]:
445
+ def list_checkpoints(self, job_id: str, conn=None) -> Iterator[Checkpoint]:
446
446
  """Returns all checkpoints related to some job"""
447
447
 
448
448
  @abstractmethod
449
- def get_last_checkpoint(self, job_id: str, conn=None) -> Optional[Checkpoint]:
449
+ def get_last_checkpoint(self, job_id: str, conn=None) -> Checkpoint | None:
450
450
  """Get last created checkpoint for some job."""
451
451
 
452
452
  @abstractmethod
@@ -455,7 +455,7 @@ class AbstractMetastore(ABC, Serializable):
455
455
 
456
456
  def find_checkpoint(
457
457
  self, job_id: str, _hash: str, partial: bool = False, conn=None
458
- ) -> Optional[Checkpoint]:
458
+ ) -> Checkpoint | None:
459
459
  """
460
460
  Tries to find checkpoint for a job with specific hash and optionally partial
461
461
  """
@@ -466,7 +466,7 @@ class AbstractMetastore(ABC, Serializable):
466
466
  job_id: str,
467
467
  _hash: str,
468
468
  partial: bool = False,
469
- conn: Optional[Any] = None,
469
+ conn: Any | None = None,
470
470
  ) -> Checkpoint:
471
471
  """Creates new checkpoint"""
472
472
 
@@ -489,7 +489,7 @@ class AbstractDBMetastore(AbstractMetastore):
489
489
 
490
490
  db: "DatabaseEngine"
491
491
 
492
- def __init__(self, uri: Optional[StorageURI] = None):
492
+ def __init__(self, uri: StorageURI | None = None):
493
493
  uri = uri or StorageURI("")
494
494
  super().__init__(uri)
495
495
 
@@ -781,8 +781,8 @@ class AbstractDBMetastore(AbstractMetastore):
781
781
  def create_namespace(
782
782
  self,
783
783
  name: str,
784
- description: Optional[str] = None,
785
- uuid: Optional[str] = None,
784
+ description: str | None = None,
785
+ uuid: str | None = None,
786
786
  ignore_if_exists: bool = True,
787
787
  validate: bool = True,
788
788
  **kwargs,
@@ -846,8 +846,8 @@ class AbstractDBMetastore(AbstractMetastore):
846
846
  self,
847
847
  namespace_name: str,
848
848
  name: str,
849
- description: Optional[str] = None,
850
- uuid: Optional[str] = None,
849
+ description: str | None = None,
850
+ uuid: str | None = None,
851
851
  ignore_if_exists: bool = True,
852
852
  validate: bool = True,
853
853
  **kwargs,
@@ -925,7 +925,7 @@ class AbstractDBMetastore(AbstractMetastore):
925
925
  raise ProjectNotFoundError(f"Project with id {project_id} not found.")
926
926
  return self.project_class.parse(*rows[0])
927
927
 
928
- def count_projects(self, namespace_id: Optional[int] = None) -> int:
928
+ def count_projects(self, namespace_id: int | None = None) -> int:
929
929
  p = self._projects
930
930
 
931
931
  query = self._projects_base_query()
@@ -949,7 +949,7 @@ class AbstractDBMetastore(AbstractMetastore):
949
949
  self.db.execute(self._projects_delete().where(p.c.id == project_id))
950
950
 
951
951
  def list_projects(
952
- self, namespace_id: Optional[int] = None, conn=None
952
+ self, namespace_id: int | None = None, conn=None
953
953
  ) -> list[Project]:
954
954
  """
955
955
  Gets a list of projects inside some namespace, or in all namespaces
@@ -972,15 +972,15 @@ class AbstractDBMetastore(AbstractMetastore):
972
972
  def create_dataset(
973
973
  self,
974
974
  name: str,
975
- project_id: Optional[int] = None,
975
+ project_id: int | None = None,
976
976
  status: int = DatasetStatus.CREATED,
977
- sources: Optional[list[str]] = None,
978
- feature_schema: Optional[dict] = None,
977
+ sources: list[str] | None = None,
978
+ feature_schema: dict | None = None,
979
979
  query_script: str = "",
980
- schema: Optional[dict[str, Any]] = None,
980
+ schema: dict[str, Any] | None = None,
981
981
  ignore_if_exists: bool = False,
982
- description: Optional[str] = None,
983
- attrs: Optional[list[str]] = None,
982
+ description: str | None = None,
983
+ attrs: list[str] | None = None,
984
984
  **kwargs, # TODO registered = True / False
985
985
  ) -> DatasetRecord:
986
986
  """Creates new dataset."""
@@ -1020,20 +1020,20 @@ class AbstractDBMetastore(AbstractMetastore):
1020
1020
  version: str,
1021
1021
  status: int,
1022
1022
  sources: str = "",
1023
- feature_schema: Optional[dict] = None,
1023
+ feature_schema: dict | None = None,
1024
1024
  query_script: str = "",
1025
1025
  error_message: str = "",
1026
1026
  error_stack: str = "",
1027
1027
  script_output: str = "",
1028
- created_at: Optional[datetime] = None,
1029
- finished_at: Optional[datetime] = None,
1030
- schema: Optional[dict[str, Any]] = None,
1028
+ created_at: datetime | None = None,
1029
+ finished_at: datetime | None = None,
1030
+ schema: dict[str, Any] | None = None,
1031
1031
  ignore_if_exists: bool = False,
1032
- num_objects: Optional[int] = None,
1033
- size: Optional[int] = None,
1034
- preview: Optional[list[dict]] = None,
1035
- job_id: Optional[str] = None,
1036
- uuid: Optional[str] = None,
1032
+ num_objects: int | None = None,
1033
+ size: int | None = None,
1034
+ preview: list[dict] | None = None,
1035
+ job_id: str | None = None,
1036
+ uuid: str | None = None,
1037
1037
  conn=None,
1038
1038
  ) -> DatasetRecord:
1039
1039
  """Creates new dataset version."""
@@ -1205,13 +1205,13 @@ class AbstractDBMetastore(AbstractMetastore):
1205
1205
  f"Dataset {dataset.name} does not have version {version}"
1206
1206
  )
1207
1207
 
1208
- def _parse_dataset(self, rows) -> Optional[DatasetRecord]:
1208
+ def _parse_dataset(self, rows) -> DatasetRecord | None:
1209
1209
  versions = [self.dataset_class.parse(*r) for r in rows]
1210
1210
  if not versions:
1211
1211
  return None
1212
1212
  return reduce(lambda ds, version: ds.merge_versions(version), versions)
1213
1213
 
1214
- def _parse_list_dataset(self, rows) -> Optional[DatasetListRecord]:
1214
+ def _parse_list_dataset(self, rows) -> DatasetListRecord | None:
1215
1215
  versions = [self.dataset_list_class.parse(*r) for r in rows]
1216
1216
  if not versions:
1217
1217
  return None
@@ -1274,7 +1274,7 @@ class AbstractDBMetastore(AbstractMetastore):
1274
1274
  )
1275
1275
 
1276
1276
  def list_datasets(
1277
- self, project_id: Optional[int] = None
1277
+ self, project_id: int | None = None
1278
1278
  ) -> Iterator["DatasetListRecord"]:
1279
1279
  d = self._datasets
1280
1280
  query = self._base_list_datasets_query().order_by(
@@ -1284,7 +1284,7 @@ class AbstractDBMetastore(AbstractMetastore):
1284
1284
  query = query.where(d.c.project_id == project_id)
1285
1285
  yield from self._parse_dataset_list(self.db.execute(query))
1286
1286
 
1287
- def count_datasets(self, project_id: Optional[int] = None) -> int:
1287
+ def count_datasets(self, project_id: int | None = None) -> int:
1288
1288
  d = self._datasets
1289
1289
  query = self._datasets_select()
1290
1290
  if project_id:
@@ -1295,7 +1295,7 @@ class AbstractDBMetastore(AbstractMetastore):
1295
1295
  return next(self.db.execute(query))[0]
1296
1296
 
1297
1297
  def list_datasets_by_prefix(
1298
- self, prefix: str, project_id: Optional[int] = None, conn=None
1298
+ self, prefix: str, project_id: int | None = None, conn=None
1299
1299
  ) -> Iterator["DatasetListRecord"]:
1300
1300
  d = self._datasets
1301
1301
  query = self._base_list_datasets_query()
@@ -1307,8 +1307,8 @@ class AbstractDBMetastore(AbstractMetastore):
1307
1307
  def get_dataset(
1308
1308
  self,
1309
1309
  name: str, # normal, not full dataset name
1310
- namespace_name: Optional[str] = None,
1311
- project_name: Optional[str] = None,
1310
+ namespace_name: str | None = None,
1311
+ project_name: str | None = None,
1312
1312
  conn=None,
1313
1313
  ) -> DatasetRecord:
1314
1314
  """
@@ -1369,7 +1369,7 @@ class AbstractDBMetastore(AbstractMetastore):
1369
1369
  self,
1370
1370
  dataset: DatasetRecord,
1371
1371
  status: int,
1372
- version: Optional[str] = None,
1372
+ version: str | None = None,
1373
1373
  error_message="",
1374
1374
  error_stack="",
1375
1375
  script_output="",
@@ -1423,8 +1423,8 @@ class AbstractDBMetastore(AbstractMetastore):
1423
1423
  self,
1424
1424
  source_dataset: DatasetRecord,
1425
1425
  source_dataset_version: str,
1426
- new_source_dataset: Optional[DatasetRecord] = None,
1427
- new_source_dataset_version: Optional[str] = None,
1426
+ new_source_dataset: DatasetRecord | None = None,
1427
+ new_source_dataset_version: str | None = None,
1428
1428
  ) -> None:
1429
1429
  dd = self._datasets_dependencies
1430
1430
 
@@ -1456,7 +1456,7 @@ class AbstractDBMetastore(AbstractMetastore):
1456
1456
 
1457
1457
  def get_direct_dataset_dependencies(
1458
1458
  self, dataset: DatasetRecord, version: str
1459
- ) -> list[Optional[DatasetDependency]]:
1459
+ ) -> list[DatasetDependency | None]:
1460
1460
  n = self._namespaces
1461
1461
  p = self._projects
1462
1462
  d = self._datasets
@@ -1484,7 +1484,7 @@ class AbstractDBMetastore(AbstractMetastore):
1484
1484
  return [self.dependency_class.parse(*r) for r in self.db.execute(query)]
1485
1485
 
1486
1486
  def remove_dataset_dependencies(
1487
- self, dataset: DatasetRecord, version: Optional[str] = None
1487
+ self, dataset: DatasetRecord, version: str | None = None
1488
1488
  ) -> None:
1489
1489
  """
1490
1490
  When we remove dataset, we need to clean up it's dependencies as well
@@ -1503,7 +1503,7 @@ class AbstractDBMetastore(AbstractMetastore):
1503
1503
  self.db.execute(q)
1504
1504
 
1505
1505
  def remove_dataset_dependants(
1506
- self, dataset: DatasetRecord, version: Optional[str] = None
1506
+ self, dataset: DatasetRecord, version: str | None = None
1507
1507
  ) -> None:
1508
1508
  """
1509
1509
  When we remove dataset, we need to clear its references in other dataset
@@ -1600,10 +1600,10 @@ class AbstractDBMetastore(AbstractMetastore):
1600
1600
  query_type: JobQueryType = JobQueryType.PYTHON,
1601
1601
  status: JobStatus = JobStatus.CREATED,
1602
1602
  workers: int = 1,
1603
- python_version: Optional[str] = None,
1604
- params: Optional[dict[str, str]] = None,
1605
- parent_job_id: Optional[str] = None,
1606
- conn: Optional[Any] = None,
1603
+ python_version: str | None = None,
1604
+ params: dict[str, str] | None = None,
1605
+ parent_job_id: str | None = None,
1606
+ conn: Any = None,
1607
1607
  ) -> str:
1608
1608
  """
1609
1609
  Creates a new job.
@@ -1630,7 +1630,7 @@ class AbstractDBMetastore(AbstractMetastore):
1630
1630
  )
1631
1631
  return job_id
1632
1632
 
1633
- def get_job(self, job_id: str, conn=None) -> Optional[Job]:
1633
+ def get_job(self, job_id: str, conn=None) -> Job | None:
1634
1634
  """Returns the job with the given ID."""
1635
1635
  query = self._jobs_select(self._jobs).where(self._jobs.c.id == job_id)
1636
1636
  results = list(self.db.execute(query, conn=conn))
@@ -1641,13 +1641,13 @@ class AbstractDBMetastore(AbstractMetastore):
1641
1641
  def update_job(
1642
1642
  self,
1643
1643
  job_id: str,
1644
- status: Optional[JobStatus] = None,
1645
- error_message: Optional[str] = None,
1646
- error_stack: Optional[str] = None,
1647
- finished_at: Optional[datetime] = None,
1648
- metrics: Optional[dict[str, Any]] = None,
1649
- conn: Optional[Any] = None,
1650
- ) -> Optional["Job"]:
1644
+ status: JobStatus | None = None,
1645
+ error_message: str | None = None,
1646
+ error_stack: str | None = None,
1647
+ finished_at: datetime | None = None,
1648
+ metrics: dict[str, Any] | None = None,
1649
+ conn: Any | None = None,
1650
+ ) -> Job | None:
1651
1651
  """Updates job fields."""
1652
1652
  values: dict = {}
1653
1653
  if status is not None:
@@ -1674,9 +1674,9 @@ class AbstractDBMetastore(AbstractMetastore):
1674
1674
  self,
1675
1675
  job_id: str,
1676
1676
  status: JobStatus,
1677
- error_message: Optional[str] = None,
1678
- error_stack: Optional[str] = None,
1679
- conn: Optional[Any] = None,
1677
+ error_message: str | None = None,
1678
+ error_stack: str | None = None,
1679
+ conn: Any | None = None,
1680
1680
  ) -> None:
1681
1681
  """Set the status of the given job."""
1682
1682
  values: dict = {"status": status}
@@ -1694,8 +1694,8 @@ class AbstractDBMetastore(AbstractMetastore):
1694
1694
  def get_job_status(
1695
1695
  self,
1696
1696
  job_id: str,
1697
- conn: Optional[Any] = None,
1698
- ) -> Optional[JobStatus]:
1697
+ conn: Any | None = None,
1698
+ ) -> JobStatus | None:
1699
1699
  """Returns the status of the given job."""
1700
1700
  results = list(
1701
1701
  self.db.execute(
@@ -1761,7 +1761,7 @@ class AbstractDBMetastore(AbstractMetastore):
1761
1761
  job_id: str,
1762
1762
  _hash: str,
1763
1763
  partial: bool = False,
1764
- conn: Optional[Any] = None,
1764
+ conn: Any | None = None,
1765
1765
  ) -> Checkpoint:
1766
1766
  """
1767
1767
  Creates a new job query step.
@@ -1797,7 +1797,7 @@ class AbstractDBMetastore(AbstractMetastore):
1797
1797
 
1798
1798
  def find_checkpoint(
1799
1799
  self, job_id: str, _hash: str, partial: bool = False, conn=None
1800
- ) -> Optional[Checkpoint]:
1800
+ ) -> Checkpoint | None:
1801
1801
  """
1802
1802
  Tries to find checkpoint for a job with specific hash and optionally partial
1803
1803
  """
@@ -1810,7 +1810,7 @@ class AbstractDBMetastore(AbstractMetastore):
1810
1810
  return None
1811
1811
  return self.checkpoint_class.parse(*rows[0])
1812
1812
 
1813
- def get_last_checkpoint(self, job_id: str, conn=None) -> Optional[Checkpoint]:
1813
+ def get_last_checkpoint(self, job_id: str, conn=None) -> Checkpoint | None:
1814
1814
  query = (
1815
1815
  self._checkpoints_query()
1816
1816
  .where(self._checkpoints.c.job_id == job_id)
@@ -1,12 +1,6 @@
1
1
  import inspect
2
2
  from collections.abc import Iterable, Iterator, Sequence
3
- from typing import (
4
- TYPE_CHECKING,
5
- Any,
6
- Generic,
7
- Optional,
8
- TypeVar,
9
- )
3
+ from typing import TYPE_CHECKING, Any, Generic, TypeVar
10
4
 
11
5
  import sqlalchemy as sa
12
6
  from sqlalchemy.sql import func as f
@@ -96,11 +90,11 @@ class DirExpansion:
96
90
  def __init__(self, column: str):
97
91
  self.column = column
98
92
 
99
- def col_name(self, name: str, column: Optional[str] = None) -> str:
93
+ def col_name(self, name: str, column: str | None = None) -> str:
100
94
  column = column or self.column
101
95
  return col_name(name, column)
102
96
 
103
- def c(self, query, name: str, column: Optional[str] = None) -> str:
97
+ def c(self, query, name: str, column: str | None = None) -> str:
104
98
  return getattr(query.c, self.col_name(name, column=column))
105
99
 
106
100
  def base_select(self, q):
@@ -161,7 +155,7 @@ class DataTable:
161
155
  self,
162
156
  name: str,
163
157
  engine: "DatabaseEngine",
164
- column_types: Optional[dict[str, SQLType]] = None,
158
+ column_types: dict[str, SQLType] | None = None,
165
159
  column: str = "file",
166
160
  ):
167
161
  self.name: str = name
@@ -172,12 +166,12 @@ class DataTable:
172
166
  @staticmethod
173
167
  def copy_column(
174
168
  column: sa.Column,
175
- primary_key: Optional[bool] = None,
176
- index: Optional[bool] = None,
177
- nullable: Optional[bool] = None,
178
- default: Optional[Any] = None,
179
- server_default: Optional[Any] = None,
180
- unique: Optional[bool] = None,
169
+ primary_key: bool | None = None,
170
+ index: bool | None = None,
171
+ nullable: bool | None = None,
172
+ default: Any | None = None,
173
+ server_default: Any | None = None,
174
+ unique: bool | None = None,
181
175
  ) -> sa.Column:
182
176
  """
183
177
  Copy a sqlalchemy Column object intended for use as a signal column.
@@ -206,8 +200,8 @@ class DataTable:
206
200
  def new_table(
207
201
  cls,
208
202
  name: str,
209
- columns: Sequence["sa.Column"] = (),
210
- metadata: Optional["sa.MetaData"] = None,
203
+ columns: Sequence[sa.Column] = (),
204
+ metadata: sa.MetaData | None = None,
211
205
  ):
212
206
  # copy columns, since reusing the same objects from another table
213
207
  # may raise an error
@@ -218,7 +212,7 @@ class DataTable:
218
212
  metadata = sa.MetaData()
219
213
  return sa.Table(name, metadata, *columns)
220
214
 
221
- def get_table(self) -> "sa.Table":
215
+ def get_table(self) -> sa.Table:
222
216
  table = self.engine.get_table(self.name)
223
217
 
224
218
  column_types = self.column_types | {c.name: c.type for c in self.sys_columns()}
@@ -233,19 +227,19 @@ class DataTable:
233
227
  def columns(self) -> "ReadOnlyColumnCollection[str, sa.Column[Any]]":
234
228
  return self.table.columns
235
229
 
236
- def col_name(self, name: str, column: Optional[str] = None) -> str:
230
+ def col_name(self, name: str, column: str | None = None) -> str:
237
231
  column = column or self.column
238
232
  return col_name(name, column)
239
233
 
240
- def without_object(self, column_name: str, column: Optional[str] = None) -> str:
234
+ def without_object(self, column_name: str, column: str | None = None) -> str:
241
235
  column = column or self.column
242
236
  return column_name.removeprefix(f"{column}{DEFAULT_DELIMITER}")
243
237
 
244
- def c(self, name: str, column: Optional[str] = None):
238
+ def c(self, name: str, column: str | None = None):
245
239
  return getattr(self.columns, self.col_name(name, column=column))
246
240
 
247
241
  @property
248
- def table(self) -> "sa.Table":
242
+ def table(self) -> sa.Table:
249
243
  return self.get_table()
250
244
 
251
245
  def apply_conditions(self, query: "Executable") -> "Executable":
@@ -303,7 +297,7 @@ PARTITION_COLUMN_ID = "partition_id"
303
297
  partition_col_names = [PARTITION_COLUMN_ID]
304
298
 
305
299
 
306
- def partition_columns() -> Sequence["sa.Column"]:
300
+ def partition_columns() -> Sequence[sa.Column]:
307
301
  return [
308
302
  sa.Column(PARTITION_COLUMN_ID, sa.Integer),
309
303
  ]