datachain 0.30.5__py3-none-any.whl → 0.39.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datachain/__init__.py +4 -0
- datachain/asyn.py +11 -12
- datachain/cache.py +5 -5
- datachain/catalog/__init__.py +0 -2
- datachain/catalog/catalog.py +276 -354
- datachain/catalog/dependency.py +164 -0
- datachain/catalog/loader.py +8 -3
- datachain/checkpoint.py +43 -0
- datachain/cli/__init__.py +10 -17
- datachain/cli/commands/__init__.py +1 -8
- datachain/cli/commands/datasets.py +42 -27
- datachain/cli/commands/ls.py +15 -15
- datachain/cli/commands/show.py +2 -2
- datachain/cli/parser/__init__.py +3 -43
- datachain/cli/parser/job.py +1 -1
- datachain/cli/parser/utils.py +1 -2
- datachain/cli/utils.py +2 -15
- datachain/client/azure.py +2 -2
- datachain/client/fsspec.py +34 -23
- datachain/client/gcs.py +3 -3
- datachain/client/http.py +157 -0
- datachain/client/local.py +11 -7
- datachain/client/s3.py +3 -3
- datachain/config.py +4 -8
- datachain/data_storage/db_engine.py +12 -6
- datachain/data_storage/job.py +2 -0
- datachain/data_storage/metastore.py +716 -137
- datachain/data_storage/schema.py +20 -27
- datachain/data_storage/serializer.py +105 -15
- datachain/data_storage/sqlite.py +114 -114
- datachain/data_storage/warehouse.py +140 -48
- datachain/dataset.py +109 -89
- datachain/delta.py +117 -42
- datachain/diff/__init__.py +25 -33
- datachain/error.py +24 -0
- datachain/func/aggregate.py +9 -11
- datachain/func/array.py +12 -12
- datachain/func/base.py +7 -4
- datachain/func/conditional.py +9 -13
- datachain/func/func.py +63 -45
- datachain/func/numeric.py +5 -7
- datachain/func/string.py +2 -2
- datachain/hash_utils.py +123 -0
- datachain/job.py +11 -7
- datachain/json.py +138 -0
- datachain/lib/arrow.py +18 -15
- datachain/lib/audio.py +60 -59
- datachain/lib/clip.py +14 -13
- datachain/lib/convert/python_to_sql.py +6 -10
- datachain/lib/convert/values_to_tuples.py +151 -53
- datachain/lib/data_model.py +23 -19
- datachain/lib/dataset_info.py +7 -7
- datachain/lib/dc/__init__.py +2 -1
- datachain/lib/dc/csv.py +22 -26
- datachain/lib/dc/database.py +37 -34
- datachain/lib/dc/datachain.py +518 -324
- datachain/lib/dc/datasets.py +38 -30
- datachain/lib/dc/hf.py +16 -20
- datachain/lib/dc/json.py +17 -18
- datachain/lib/dc/listings.py +5 -8
- datachain/lib/dc/pandas.py +3 -6
- datachain/lib/dc/parquet.py +33 -21
- datachain/lib/dc/records.py +9 -13
- datachain/lib/dc/storage.py +103 -65
- datachain/lib/dc/storage_pattern.py +251 -0
- datachain/lib/dc/utils.py +17 -14
- datachain/lib/dc/values.py +3 -6
- datachain/lib/file.py +187 -50
- datachain/lib/hf.py +7 -5
- datachain/lib/image.py +13 -13
- datachain/lib/listing.py +5 -5
- datachain/lib/listing_info.py +1 -2
- datachain/lib/meta_formats.py +2 -3
- datachain/lib/model_store.py +20 -8
- datachain/lib/namespaces.py +59 -7
- datachain/lib/projects.py +51 -9
- datachain/lib/pytorch.py +31 -23
- datachain/lib/settings.py +188 -85
- datachain/lib/signal_schema.py +302 -64
- datachain/lib/text.py +8 -7
- datachain/lib/udf.py +103 -63
- datachain/lib/udf_signature.py +59 -34
- datachain/lib/utils.py +20 -0
- datachain/lib/video.py +3 -4
- datachain/lib/webdataset.py +31 -36
- datachain/lib/webdataset_laion.py +15 -16
- datachain/listing.py +12 -5
- datachain/model/bbox.py +3 -1
- datachain/namespace.py +22 -3
- datachain/node.py +6 -6
- datachain/nodes_thread_pool.py +0 -1
- datachain/plugins.py +24 -0
- datachain/project.py +4 -4
- datachain/query/batch.py +10 -12
- datachain/query/dataset.py +376 -194
- datachain/query/dispatch.py +112 -84
- datachain/query/metrics.py +3 -4
- datachain/query/params.py +2 -3
- datachain/query/queue.py +2 -1
- datachain/query/schema.py +7 -6
- datachain/query/session.py +190 -33
- datachain/query/udf.py +9 -6
- datachain/remote/studio.py +90 -53
- datachain/script_meta.py +12 -12
- datachain/sql/sqlite/base.py +37 -25
- datachain/sql/sqlite/types.py +1 -1
- datachain/sql/types.py +36 -5
- datachain/studio.py +49 -40
- datachain/toolkit/split.py +31 -10
- datachain/utils.py +39 -48
- {datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/METADATA +26 -38
- datachain-0.39.0.dist-info/RECORD +173 -0
- datachain/cli/commands/query.py +0 -54
- datachain/query/utils.py +0 -36
- datachain-0.30.5.dist-info/RECORD +0 -168
- {datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/WHEEL +0 -0
- {datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/entry_points.txt +0 -0
- {datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.30.5.dist-info → datachain-0.39.0.dist-info}/top_level.txt +0 -0
datachain/dataset.py
CHANGED
|
@@ -1,21 +1,14 @@
|
|
|
1
1
|
import builtins
|
|
2
|
-
import json
|
|
3
2
|
from dataclasses import dataclass, fields
|
|
4
3
|
from datetime import datetime
|
|
5
4
|
from functools import cached_property
|
|
6
|
-
from typing import
|
|
7
|
-
Any,
|
|
8
|
-
NewType,
|
|
9
|
-
Optional,
|
|
10
|
-
TypeVar,
|
|
11
|
-
Union,
|
|
12
|
-
)
|
|
5
|
+
from typing import Any, NewType, TypeVar
|
|
13
6
|
from urllib.parse import urlparse
|
|
14
7
|
|
|
15
8
|
from packaging.specifiers import SpecifierSet
|
|
16
9
|
from packaging.version import Version
|
|
17
10
|
|
|
18
|
-
from datachain import semver
|
|
11
|
+
from datachain import json, semver
|
|
19
12
|
from datachain.error import DatasetVersionNotFoundError, InvalidDatasetNameError
|
|
20
13
|
from datachain.namespace import Namespace
|
|
21
14
|
from datachain.project import Project
|
|
@@ -43,7 +36,7 @@ DATASET_NAME_REPLACEMENT_CHAR = "_"
|
|
|
43
36
|
StorageURI = NewType("StorageURI", str)
|
|
44
37
|
|
|
45
38
|
|
|
46
|
-
def parse_dataset_uri(uri: str) -> tuple[str,
|
|
39
|
+
def parse_dataset_uri(uri: str) -> tuple[str, str | None]:
|
|
47
40
|
"""
|
|
48
41
|
Parse dataser uri to extract name and version out of it (if version is defined)
|
|
49
42
|
Example:
|
|
@@ -65,7 +58,7 @@ def parse_dataset_uri(uri: str) -> tuple[str, Optional[str]]:
|
|
|
65
58
|
|
|
66
59
|
|
|
67
60
|
def create_dataset_uri(
|
|
68
|
-
name: str, namespace: str, project: str, version:
|
|
61
|
+
name: str, namespace: str, project: str, version: str | None = None
|
|
69
62
|
) -> str:
|
|
70
63
|
"""
|
|
71
64
|
Creates a dataset uri based on namespace, project, dataset name and optionally
|
|
@@ -81,7 +74,7 @@ def create_dataset_uri(
|
|
|
81
74
|
return uri
|
|
82
75
|
|
|
83
76
|
|
|
84
|
-
def parse_dataset_name(name: str) -> tuple[
|
|
77
|
+
def parse_dataset_name(name: str) -> tuple[str | None, str | None, str]:
|
|
85
78
|
"""Parses dataset name and returns namespace, project and name"""
|
|
86
79
|
if not name:
|
|
87
80
|
raise InvalidDatasetNameError("Name must be defined to parse it")
|
|
@@ -95,6 +88,40 @@ def parse_dataset_name(name: str) -> tuple[Optional[str], Optional[str], str]:
|
|
|
95
88
|
return namespace_name, project_name, name
|
|
96
89
|
|
|
97
90
|
|
|
91
|
+
def parse_schema(ct: dict[str, Any]) -> dict[str, SQLType | type[SQLType]]:
|
|
92
|
+
"""Parse dataset schema from dictionary representation.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
ct: Dictionary with column definitions
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Dictionary mapping column names to SQL types
|
|
99
|
+
|
|
100
|
+
Raises:
|
|
101
|
+
TypeError: If schema format is invalid
|
|
102
|
+
ValueError: If column type is not defined or not supported
|
|
103
|
+
"""
|
|
104
|
+
if not isinstance(ct, dict):
|
|
105
|
+
raise TypeError("Schema definition must be a dictionary")
|
|
106
|
+
res = {}
|
|
107
|
+
for c_name, c_type in ct.items():
|
|
108
|
+
if not isinstance(c_type, dict):
|
|
109
|
+
raise TypeError(f"Schema column '{c_name}' type must be a dictionary")
|
|
110
|
+
if "type" not in c_type:
|
|
111
|
+
raise ValueError(f"Schema column '{c_name}' type is not defined")
|
|
112
|
+
if c_type["type"] not in NAME_TYPES_MAPPING:
|
|
113
|
+
raise ValueError(
|
|
114
|
+
f"Schema column '{c_name}' type '{c_type['type']}' is not supported"
|
|
115
|
+
)
|
|
116
|
+
try:
|
|
117
|
+
res[c_name] = NAME_TYPES_MAPPING[c_type["type"]].from_dict(c_type) # type: ignore [attr-defined]
|
|
118
|
+
except Exception as e:
|
|
119
|
+
raise ValueError(
|
|
120
|
+
f"Schema column '{c_name}' type '{c_type['type']}' parsing error: {e}"
|
|
121
|
+
) from e
|
|
122
|
+
return res
|
|
123
|
+
|
|
124
|
+
|
|
98
125
|
class DatasetDependencyType:
|
|
99
126
|
DATASET = "dataset"
|
|
100
127
|
STORAGE = "storage"
|
|
@@ -111,7 +138,7 @@ class DatasetDependency:
|
|
|
111
138
|
name: str
|
|
112
139
|
version: str
|
|
113
140
|
created_at: datetime
|
|
114
|
-
dependencies: list[
|
|
141
|
+
dependencies: list["DatasetDependency | None"]
|
|
115
142
|
|
|
116
143
|
@property
|
|
117
144
|
def dataset_name(self) -> str:
|
|
@@ -131,12 +158,12 @@ class DatasetDependency:
|
|
|
131
158
|
namespace_name: str,
|
|
132
159
|
project_name: str,
|
|
133
160
|
id: int,
|
|
134
|
-
dataset_id:
|
|
135
|
-
dataset_version_id:
|
|
136
|
-
dataset_name:
|
|
137
|
-
dataset_version:
|
|
138
|
-
dataset_version_created_at:
|
|
139
|
-
) ->
|
|
161
|
+
dataset_id: int | None,
|
|
162
|
+
dataset_version_id: int | None,
|
|
163
|
+
dataset_name: str | None,
|
|
164
|
+
dataset_version: str | None,
|
|
165
|
+
dataset_version_created_at: datetime | None,
|
|
166
|
+
) -> "DatasetDependency | None":
|
|
140
167
|
from datachain.lib.listing import is_listing_dataset
|
|
141
168
|
|
|
142
169
|
if not dataset_id:
|
|
@@ -198,17 +225,17 @@ class DatasetVersion:
|
|
|
198
225
|
status: int
|
|
199
226
|
feature_schema: dict
|
|
200
227
|
created_at: datetime
|
|
201
|
-
finished_at:
|
|
228
|
+
finished_at: datetime | None
|
|
202
229
|
error_message: str
|
|
203
230
|
error_stack: str
|
|
204
231
|
script_output: str
|
|
205
|
-
schema: dict[str,
|
|
206
|
-
num_objects:
|
|
207
|
-
size:
|
|
208
|
-
_preview_data:
|
|
232
|
+
schema: dict[str, SQLType | type[SQLType]]
|
|
233
|
+
num_objects: int | None
|
|
234
|
+
size: int | None
|
|
235
|
+
_preview_data: str | list[dict] | None
|
|
209
236
|
sources: str = ""
|
|
210
237
|
query_script: str = ""
|
|
211
|
-
job_id:
|
|
238
|
+
job_id: str | None = None
|
|
212
239
|
|
|
213
240
|
@classmethod
|
|
214
241
|
def parse( # noqa: PLR0913
|
|
@@ -218,20 +245,25 @@ class DatasetVersion:
|
|
|
218
245
|
dataset_id: int,
|
|
219
246
|
version: str,
|
|
220
247
|
status: int,
|
|
221
|
-
feature_schema:
|
|
248
|
+
feature_schema: str | None,
|
|
222
249
|
created_at: datetime,
|
|
223
|
-
finished_at:
|
|
250
|
+
finished_at: datetime | None,
|
|
224
251
|
error_message: str,
|
|
225
252
|
error_stack: str,
|
|
226
253
|
script_output: str,
|
|
227
|
-
num_objects:
|
|
228
|
-
size:
|
|
229
|
-
preview:
|
|
230
|
-
schema: dict[str,
|
|
254
|
+
num_objects: int | None,
|
|
255
|
+
size: int | None,
|
|
256
|
+
preview: str | list[dict] | None,
|
|
257
|
+
schema: str | dict[str, SQLType | type[SQLType]],
|
|
231
258
|
sources: str = "",
|
|
232
259
|
query_script: str = "",
|
|
233
|
-
job_id:
|
|
260
|
+
job_id: str | None = None,
|
|
234
261
|
):
|
|
262
|
+
if isinstance(schema, str):
|
|
263
|
+
schema_parsed = parse_schema(json.loads(schema) if schema else {})
|
|
264
|
+
else:
|
|
265
|
+
schema_parsed = schema
|
|
266
|
+
|
|
235
267
|
return cls(
|
|
236
268
|
id,
|
|
237
269
|
uuid,
|
|
@@ -244,7 +276,7 @@ class DatasetVersion:
|
|
|
244
276
|
error_message,
|
|
245
277
|
error_stack,
|
|
246
278
|
script_output,
|
|
247
|
-
|
|
279
|
+
schema_parsed,
|
|
248
280
|
num_objects,
|
|
249
281
|
size,
|
|
250
282
|
preview,
|
|
@@ -292,7 +324,7 @@ class DatasetVersion:
|
|
|
292
324
|
}
|
|
293
325
|
|
|
294
326
|
@cached_property
|
|
295
|
-
def preview(self) ->
|
|
327
|
+
def preview(self) -> list[dict] | None:
|
|
296
328
|
if isinstance(self._preview_data, str):
|
|
297
329
|
return json.loads(self._preview_data)
|
|
298
330
|
return self._preview_data if self._preview_data else None
|
|
@@ -313,13 +345,13 @@ class DatasetListVersion:
|
|
|
313
345
|
version: str
|
|
314
346
|
status: int
|
|
315
347
|
created_at: datetime
|
|
316
|
-
finished_at:
|
|
348
|
+
finished_at: datetime | None
|
|
317
349
|
error_message: str
|
|
318
350
|
error_stack: str
|
|
319
|
-
num_objects:
|
|
320
|
-
size:
|
|
351
|
+
num_objects: int | None
|
|
352
|
+
size: int | None
|
|
321
353
|
query_script: str = ""
|
|
322
|
-
job_id:
|
|
354
|
+
job_id: str | None = None
|
|
323
355
|
|
|
324
356
|
@classmethod
|
|
325
357
|
def parse(
|
|
@@ -330,13 +362,13 @@ class DatasetListVersion:
|
|
|
330
362
|
version: str,
|
|
331
363
|
status: int,
|
|
332
364
|
created_at: datetime,
|
|
333
|
-
finished_at:
|
|
365
|
+
finished_at: datetime | None,
|
|
334
366
|
error_message: str,
|
|
335
367
|
error_stack: str,
|
|
336
|
-
num_objects:
|
|
337
|
-
size:
|
|
368
|
+
num_objects: int | None,
|
|
369
|
+
size: int | None,
|
|
338
370
|
query_script: str = "",
|
|
339
|
-
job_id:
|
|
371
|
+
job_id: str | None = None,
|
|
340
372
|
**kwargs,
|
|
341
373
|
):
|
|
342
374
|
return cls(
|
|
@@ -368,14 +400,14 @@ class DatasetRecord:
|
|
|
368
400
|
id: int
|
|
369
401
|
name: str
|
|
370
402
|
project: Project
|
|
371
|
-
description:
|
|
403
|
+
description: str | None
|
|
372
404
|
attrs: list[str]
|
|
373
|
-
schema: dict[str,
|
|
405
|
+
schema: dict[str, SQLType | type[SQLType]]
|
|
374
406
|
feature_schema: dict
|
|
375
407
|
versions: list[DatasetVersion]
|
|
376
408
|
status: int = DatasetStatus.CREATED
|
|
377
|
-
created_at:
|
|
378
|
-
finished_at:
|
|
409
|
+
created_at: datetime | None = None
|
|
410
|
+
finished_at: datetime | None = None
|
|
379
411
|
error_message: str = ""
|
|
380
412
|
error_stack: str = ""
|
|
381
413
|
script_output: str = ""
|
|
@@ -385,15 +417,6 @@ class DatasetRecord:
|
|
|
385
417
|
def __hash__(self):
|
|
386
418
|
return hash(f"{self.id}")
|
|
387
419
|
|
|
388
|
-
@staticmethod
|
|
389
|
-
def parse_schema(
|
|
390
|
-
ct: dict[str, Any],
|
|
391
|
-
) -> dict[str, Union[SQLType, type[SQLType]]]:
|
|
392
|
-
return {
|
|
393
|
-
c_name: NAME_TYPES_MAPPING[c_type["type"]].from_dict(c_type) # type: ignore [attr-defined]
|
|
394
|
-
for c_name, c_type in ct.items()
|
|
395
|
-
}
|
|
396
|
-
|
|
397
420
|
@staticmethod
|
|
398
421
|
def validate_name(name: str) -> None:
|
|
399
422
|
"""Throws exception if name has reserved characters"""
|
|
@@ -409,23 +432,23 @@ class DatasetRecord:
|
|
|
409
432
|
namespace_id: int,
|
|
410
433
|
namespace_uuid: str,
|
|
411
434
|
namespace_name: str,
|
|
412
|
-
namespace_description:
|
|
435
|
+
namespace_description: str | None,
|
|
413
436
|
namespace_created_at: datetime,
|
|
414
437
|
project_id: int,
|
|
415
438
|
project_uuid: str,
|
|
416
439
|
project_name: str,
|
|
417
|
-
project_description:
|
|
440
|
+
project_description: str | None,
|
|
418
441
|
project_created_at: datetime,
|
|
419
442
|
project_namespace_id: int,
|
|
420
443
|
dataset_id: int,
|
|
421
444
|
dataset_project_id: int,
|
|
422
445
|
name: str,
|
|
423
|
-
description:
|
|
446
|
+
description: str | None,
|
|
424
447
|
attrs: str,
|
|
425
448
|
status: int,
|
|
426
|
-
feature_schema:
|
|
449
|
+
feature_schema: str | None,
|
|
427
450
|
created_at: datetime,
|
|
428
|
-
finished_at:
|
|
451
|
+
finished_at: datetime | None,
|
|
429
452
|
error_message: str,
|
|
430
453
|
error_stack: str,
|
|
431
454
|
script_output: str,
|
|
@@ -437,25 +460,22 @@ class DatasetRecord:
|
|
|
437
460
|
version_dataset_id: int,
|
|
438
461
|
version: str,
|
|
439
462
|
version_status: int,
|
|
440
|
-
version_feature_schema:
|
|
463
|
+
version_feature_schema: str | None,
|
|
441
464
|
version_created_at: datetime,
|
|
442
|
-
version_finished_at:
|
|
465
|
+
version_finished_at: datetime | None,
|
|
443
466
|
version_error_message: str,
|
|
444
467
|
version_error_stack: str,
|
|
445
468
|
version_script_output: str,
|
|
446
|
-
version_num_objects:
|
|
447
|
-
version_size:
|
|
448
|
-
version_preview:
|
|
449
|
-
version_sources:
|
|
450
|
-
version_query_script:
|
|
469
|
+
version_num_objects: int | None,
|
|
470
|
+
version_size: int | None,
|
|
471
|
+
version_preview: str | None,
|
|
472
|
+
version_sources: str | None,
|
|
473
|
+
version_query_script: str | None,
|
|
451
474
|
version_schema: str,
|
|
452
|
-
version_job_id:
|
|
475
|
+
version_job_id: str | None = None,
|
|
453
476
|
) -> "DatasetRecord":
|
|
454
477
|
attrs_lst: list[str] = json.loads(attrs) if attrs else []
|
|
455
478
|
schema_dct: dict[str, Any] = json.loads(schema) if schema else {}
|
|
456
|
-
version_schema_dct: dict[str, str] = (
|
|
457
|
-
json.loads(version_schema) if version_schema else {}
|
|
458
|
-
)
|
|
459
479
|
|
|
460
480
|
namespace = Namespace(
|
|
461
481
|
namespace_id,
|
|
@@ -489,7 +509,7 @@ class DatasetRecord:
|
|
|
489
509
|
version_num_objects,
|
|
490
510
|
version_size,
|
|
491
511
|
version_preview,
|
|
492
|
-
|
|
512
|
+
version_schema,
|
|
493
513
|
version_sources, # type: ignore[arg-type]
|
|
494
514
|
version_query_script, # type: ignore[arg-type]
|
|
495
515
|
version_job_id,
|
|
@@ -501,7 +521,7 @@ class DatasetRecord:
|
|
|
501
521
|
project,
|
|
502
522
|
description,
|
|
503
523
|
attrs_lst,
|
|
504
|
-
|
|
524
|
+
parse_schema(schema_dct), # type: ignore[arg-type]
|
|
505
525
|
json.loads(feature_schema) if feature_schema else {},
|
|
506
526
|
[dataset_version],
|
|
507
527
|
status,
|
|
@@ -527,7 +547,7 @@ class DatasetRecord:
|
|
|
527
547
|
def full_name(self) -> str:
|
|
528
548
|
return f"{self.project.namespace.name}.{self.project.name}.{self.name}"
|
|
529
549
|
|
|
530
|
-
def get_schema(self, version: str) -> dict[str,
|
|
550
|
+
def get_schema(self, version: str) -> dict[str, SQLType | type[SQLType]]:
|
|
531
551
|
return self.get_version(version).schema if version else self.schema
|
|
532
552
|
|
|
533
553
|
def update(self, **kwargs):
|
|
@@ -619,7 +639,7 @@ class DatasetRecord:
|
|
|
619
639
|
if not self.versions:
|
|
620
640
|
return "1.0.0"
|
|
621
641
|
|
|
622
|
-
major,
|
|
642
|
+
major, _, _ = semver.parse(self.latest_version)
|
|
623
643
|
return semver.create(major + 1, 0, 0)
|
|
624
644
|
|
|
625
645
|
@property
|
|
@@ -630,7 +650,7 @@ class DatasetRecord:
|
|
|
630
650
|
if not self.versions:
|
|
631
651
|
return "1.0.0"
|
|
632
652
|
|
|
633
|
-
major, minor,
|
|
653
|
+
major, minor, _ = semver.parse(self.latest_version)
|
|
634
654
|
return semver.create(major, minor + 1, 0)
|
|
635
655
|
|
|
636
656
|
@property
|
|
@@ -649,7 +669,7 @@ class DatasetRecord:
|
|
|
649
669
|
"""Returns latest version of a dataset"""
|
|
650
670
|
return max(self.versions).version
|
|
651
671
|
|
|
652
|
-
def latest_major_version(self, major: int) ->
|
|
672
|
+
def latest_major_version(self, major: int) -> str | None:
|
|
653
673
|
"""
|
|
654
674
|
Returns latest specific major version, e.g if dataset has versions:
|
|
655
675
|
- 1.4.1
|
|
@@ -664,7 +684,7 @@ class DatasetRecord:
|
|
|
664
684
|
return None
|
|
665
685
|
return max(versions).version
|
|
666
686
|
|
|
667
|
-
def latest_compatible_version(self, version_spec: str) ->
|
|
687
|
+
def latest_compatible_version(self, version_spec: str) -> str | None:
|
|
668
688
|
"""
|
|
669
689
|
Returns the latest version that matches the given version specifier.
|
|
670
690
|
|
|
@@ -711,10 +731,10 @@ class DatasetListRecord:
|
|
|
711
731
|
id: int
|
|
712
732
|
name: str
|
|
713
733
|
project: Project
|
|
714
|
-
description:
|
|
734
|
+
description: str | None
|
|
715
735
|
attrs: list[str]
|
|
716
736
|
versions: list[DatasetListVersion]
|
|
717
|
-
created_at:
|
|
737
|
+
created_at: datetime | None = None
|
|
718
738
|
|
|
719
739
|
@classmethod
|
|
720
740
|
def parse( # noqa: PLR0913
|
|
@@ -722,17 +742,17 @@ class DatasetListRecord:
|
|
|
722
742
|
namespace_id: int,
|
|
723
743
|
namespace_uuid: str,
|
|
724
744
|
namespace_name: str,
|
|
725
|
-
namespace_description:
|
|
745
|
+
namespace_description: str | None,
|
|
726
746
|
namespace_created_at: datetime,
|
|
727
747
|
project_id: int,
|
|
728
748
|
project_uuid: str,
|
|
729
749
|
project_name: str,
|
|
730
|
-
project_description:
|
|
750
|
+
project_description: str | None,
|
|
731
751
|
project_created_at: datetime,
|
|
732
752
|
project_namespace_id: int,
|
|
733
753
|
dataset_id: int,
|
|
734
754
|
name: str,
|
|
735
|
-
description:
|
|
755
|
+
description: str | None,
|
|
736
756
|
attrs: str,
|
|
737
757
|
created_at: datetime,
|
|
738
758
|
version_id: int,
|
|
@@ -741,13 +761,13 @@ class DatasetListRecord:
|
|
|
741
761
|
version: str,
|
|
742
762
|
version_status: int,
|
|
743
763
|
version_created_at: datetime,
|
|
744
|
-
version_finished_at:
|
|
764
|
+
version_finished_at: datetime | None,
|
|
745
765
|
version_error_message: str,
|
|
746
766
|
version_error_stack: str,
|
|
747
|
-
version_num_objects:
|
|
748
|
-
version_size:
|
|
749
|
-
version_query_script:
|
|
750
|
-
version_job_id:
|
|
767
|
+
version_num_objects: int | None,
|
|
768
|
+
version_size: int | None,
|
|
769
|
+
version_query_script: str | None,
|
|
770
|
+
version_job_id: str | None = None,
|
|
751
771
|
) -> "DatasetListRecord":
|
|
752
772
|
attrs_lst: list[str] = json.loads(attrs) if attrs else []
|
|
753
773
|
|
|
@@ -824,7 +844,7 @@ class DatasetListRecord:
|
|
|
824
844
|
from datachain.client import Client
|
|
825
845
|
|
|
826
846
|
# TODO refactor and maybe remove method in
|
|
827
|
-
# https://github.com/
|
|
847
|
+
# https://github.com/datachain-ai/datachain/issues/318
|
|
828
848
|
return Client.is_data_source_uri(self.name) or self.name.startswith(
|
|
829
849
|
LISTING_PREFIX
|
|
830
850
|
)
|