datachain 0.22.0__py3-none-any.whl → 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/catalog/catalog.py +58 -13
- datachain/cli/commands/datasets.py +4 -10
- datachain/data_storage/metastore.py +13 -2
- datachain/data_storage/sqlite.py +6 -2
- datachain/dataset.py +37 -6
- datachain/lib/dc/datachain.py +6 -12
- datachain/lib/dc/datasets.py +60 -44
- datachain/lib/dc/listings.py +2 -6
- datachain/lib/dc/records.py +1 -1
- datachain/lib/projects.py +1 -1
- datachain/lib/signal_schema.py +8 -0
- datachain/query/dataset.py +2 -8
- datachain/remote/studio.py +4 -3
- {datachain-0.22.0.dist-info → datachain-0.24.0.dist-info}/METADATA +1 -1
- {datachain-0.22.0.dist-info → datachain-0.24.0.dist-info}/RECORD +19 -19
- {datachain-0.22.0.dist-info → datachain-0.24.0.dist-info}/WHEEL +0 -0
- {datachain-0.22.0.dist-info → datachain-0.24.0.dist-info}/entry_points.txt +0 -0
- {datachain-0.22.0.dist-info → datachain-0.24.0.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.22.0.dist-info → datachain-0.24.0.dist-info}/top_level.txt +0 -0
datachain/catalog/catalog.py
CHANGED
|
@@ -49,6 +49,7 @@ from datachain.error import (
|
|
|
49
49
|
DatasetInvalidVersionError,
|
|
50
50
|
DatasetNotFoundError,
|
|
51
51
|
DatasetVersionNotFoundError,
|
|
52
|
+
NamespaceNotFoundError,
|
|
52
53
|
ProjectNotFoundError,
|
|
53
54
|
QueryScriptCancelError,
|
|
54
55
|
QueryScriptRunError,
|
|
@@ -1059,6 +1060,39 @@ class Catalog:
|
|
|
1059
1060
|
|
|
1060
1061
|
return self.get_dataset(name, project)
|
|
1061
1062
|
|
|
1063
|
+
def get_full_dataset_name(
|
|
1064
|
+
self,
|
|
1065
|
+
name: str,
|
|
1066
|
+
project_name: Optional[str] = None,
|
|
1067
|
+
namespace_name: Optional[str] = None,
|
|
1068
|
+
) -> tuple[str, str, str]:
|
|
1069
|
+
"""
|
|
1070
|
+
Returns dataset name together with separated namespace and project name.
|
|
1071
|
+
It takes into account all the ways namespace and project can be added.
|
|
1072
|
+
"""
|
|
1073
|
+
parsed_namespace_name, parsed_project_name, name = parse_dataset_name(name)
|
|
1074
|
+
|
|
1075
|
+
namespace_env = os.environ.get("DATACHAIN_NAMESPACE")
|
|
1076
|
+
project_env = os.environ.get("DATACHAIN_PROJECT")
|
|
1077
|
+
if project_env and len(project_env.split(".")) == 2:
|
|
1078
|
+
# we allow setting both namespace and project in DATACHAIN_PROJECT
|
|
1079
|
+
namespace_env, project_env = project_env.split(".")
|
|
1080
|
+
|
|
1081
|
+
namespace_name = (
|
|
1082
|
+
parsed_namespace_name
|
|
1083
|
+
or namespace_name
|
|
1084
|
+
or namespace_env
|
|
1085
|
+
or self.metastore.default_namespace_name
|
|
1086
|
+
)
|
|
1087
|
+
project_name = (
|
|
1088
|
+
parsed_project_name
|
|
1089
|
+
or project_name
|
|
1090
|
+
or project_env
|
|
1091
|
+
or self.metastore.default_project_name
|
|
1092
|
+
)
|
|
1093
|
+
|
|
1094
|
+
return namespace_name, project_name, name
|
|
1095
|
+
|
|
1062
1096
|
def get_dataset(
|
|
1063
1097
|
self, name: str, project: Optional[Project] = None
|
|
1064
1098
|
) -> DatasetRecord:
|
|
@@ -1074,21 +1108,26 @@ class Catalog:
|
|
|
1074
1108
|
namespace_name: str,
|
|
1075
1109
|
project_name: str,
|
|
1076
1110
|
version: Optional[str] = None,
|
|
1111
|
+
pull_dataset: bool = False,
|
|
1112
|
+
update: bool = False,
|
|
1077
1113
|
) -> DatasetRecord:
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1114
|
+
if self.metastore.is_local_dataset(namespace_name) or not update:
|
|
1115
|
+
try:
|
|
1116
|
+
project = self.metastore.get_project(project_name, namespace_name)
|
|
1117
|
+
ds = self.get_dataset(name, project)
|
|
1118
|
+
if not version or ds.has_version(version):
|
|
1119
|
+
return ds
|
|
1120
|
+
except (NamespaceNotFoundError, ProjectNotFoundError, DatasetNotFoundError):
|
|
1121
|
+
pass
|
|
1122
|
+
|
|
1123
|
+
if self.metastore.is_local_dataset(namespace_name):
|
|
1124
|
+
raise DatasetNotFoundError(
|
|
1125
|
+
f"Dataset {name}"
|
|
1126
|
+
+ (f" version {version} " if version else " ")
|
|
1127
|
+
+ "not found"
|
|
1128
|
+
)
|
|
1086
1129
|
|
|
1087
|
-
|
|
1088
|
-
ProjectNotFoundError,
|
|
1089
|
-
DatasetNotFoundError,
|
|
1090
|
-
DatasetVersionNotFoundError,
|
|
1091
|
-
):
|
|
1130
|
+
if pull_dataset:
|
|
1092
1131
|
print("Dataset not found in local catalog, trying to get from studio")
|
|
1093
1132
|
remote_ds_uri = create_dataset_uri(
|
|
1094
1133
|
name, namespace_name, project_name, version
|
|
@@ -1103,6 +1142,8 @@ class Catalog:
|
|
|
1103
1142
|
name, self.metastore.get_project(project_name, namespace_name)
|
|
1104
1143
|
)
|
|
1105
1144
|
|
|
1145
|
+
return self.get_remote_dataset(namespace_name, project_name, name)
|
|
1146
|
+
|
|
1106
1147
|
def get_dataset_with_version_uuid(self, uuid: str) -> DatasetRecord:
|
|
1107
1148
|
"""Returns dataset that contains version with specific uuid"""
|
|
1108
1149
|
for dataset in self.ls_datasets():
|
|
@@ -1119,6 +1160,10 @@ class Catalog:
|
|
|
1119
1160
|
|
|
1120
1161
|
info_response = studio_client.dataset_info(namespace, project, name)
|
|
1121
1162
|
if not info_response.ok:
|
|
1163
|
+
if info_response.status == 404:
|
|
1164
|
+
raise DatasetNotFoundError(
|
|
1165
|
+
f"Dataset {namespace}.{project}.{name} not found"
|
|
1166
|
+
)
|
|
1122
1167
|
raise DataChainError(info_response.message)
|
|
1123
1168
|
|
|
1124
1169
|
dataset_info = info_response.data
|
|
@@ -8,7 +8,6 @@ if TYPE_CHECKING:
|
|
|
8
8
|
|
|
9
9
|
from datachain.cli.utils import determine_flavors
|
|
10
10
|
from datachain.config import Config
|
|
11
|
-
from datachain.dataset import parse_dataset_name
|
|
12
11
|
from datachain.error import DataChainError, DatasetNotFoundError
|
|
13
12
|
from datachain.studio import list_datasets as list_datasets_studio
|
|
14
13
|
|
|
@@ -106,9 +105,8 @@ def list_datasets_local(catalog: "Catalog", name: Optional[str] = None):
|
|
|
106
105
|
|
|
107
106
|
|
|
108
107
|
def list_datasets_local_versions(catalog: "Catalog", name: str):
|
|
109
|
-
namespace_name, project_name, name =
|
|
110
|
-
|
|
111
|
-
project_name = project_name or catalog.metastore.default_project_name
|
|
108
|
+
namespace_name, project_name, name = catalog.get_full_dataset_name(name)
|
|
109
|
+
|
|
112
110
|
project = catalog.metastore.get_project(project_name, namespace_name)
|
|
113
111
|
ds = catalog.get_dataset(name, project)
|
|
114
112
|
for v in ds.versions:
|
|
@@ -137,9 +135,7 @@ def rm_dataset(
|
|
|
137
135
|
studio: Optional[bool] = False,
|
|
138
136
|
team: Optional[str] = None,
|
|
139
137
|
):
|
|
140
|
-
namespace_name, project_name, name =
|
|
141
|
-
namespace_name = namespace_name or catalog.metastore.default_namespace_name
|
|
142
|
-
project_name = project_name or catalog.metastore.default_project_name
|
|
138
|
+
namespace_name, project_name, name = catalog.get_full_dataset_name(name)
|
|
143
139
|
|
|
144
140
|
if not catalog.metastore.is_local_dataset(namespace_name) and studio:
|
|
145
141
|
from datachain.studio import remove_studio_dataset
|
|
@@ -166,9 +162,7 @@ def edit_dataset(
|
|
|
166
162
|
attrs: Optional[list[str]] = None,
|
|
167
163
|
team: Optional[str] = None,
|
|
168
164
|
):
|
|
169
|
-
namespace_name, project_name, name =
|
|
170
|
-
namespace_name = namespace_name or catalog.metastore.default_namespace_name
|
|
171
|
-
project_name = project_name or catalog.metastore.default_project_name
|
|
165
|
+
namespace_name, project_name, name = catalog.get_full_dataset_name(name)
|
|
172
166
|
|
|
173
167
|
if catalog.metastore.is_local_dataset(namespace_name):
|
|
174
168
|
try:
|
|
@@ -132,6 +132,7 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
132
132
|
description: Optional[str] = None,
|
|
133
133
|
uuid: Optional[str] = None,
|
|
134
134
|
ignore_if_exists: bool = True,
|
|
135
|
+
validate: bool = True,
|
|
135
136
|
**kwargs,
|
|
136
137
|
) -> Namespace:
|
|
137
138
|
"""Creates new namespace"""
|
|
@@ -192,6 +193,7 @@ class AbstractMetastore(ABC, Serializable):
|
|
|
192
193
|
description: Optional[str] = None,
|
|
193
194
|
uuid: Optional[str] = None,
|
|
194
195
|
ignore_if_exists: bool = True,
|
|
196
|
+
validate: bool = True,
|
|
195
197
|
**kwargs,
|
|
196
198
|
) -> Project:
|
|
197
199
|
"""Creates new project in specific namespace"""
|
|
@@ -725,8 +727,11 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
725
727
|
description: Optional[str] = None,
|
|
726
728
|
uuid: Optional[str] = None,
|
|
727
729
|
ignore_if_exists: bool = True,
|
|
730
|
+
validate: bool = True,
|
|
728
731
|
**kwargs,
|
|
729
732
|
) -> Namespace:
|
|
733
|
+
if validate:
|
|
734
|
+
Namespace.validate_name(name)
|
|
730
735
|
query = self._namespaces_insert().values(
|
|
731
736
|
name=name,
|
|
732
737
|
uuid=uuid or str(uuid4()),
|
|
@@ -775,12 +780,15 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
775
780
|
description: Optional[str] = None,
|
|
776
781
|
uuid: Optional[str] = None,
|
|
777
782
|
ignore_if_exists: bool = True,
|
|
783
|
+
validate: bool = True,
|
|
778
784
|
**kwargs,
|
|
779
785
|
) -> Project:
|
|
786
|
+
if validate:
|
|
787
|
+
Project.validate_name(name)
|
|
780
788
|
try:
|
|
781
789
|
namespace = self.get_namespace(namespace_name)
|
|
782
790
|
except NamespaceNotFoundError:
|
|
783
|
-
namespace = self.create_namespace(namespace_name)
|
|
791
|
+
namespace = self.create_namespace(namespace_name, validate=validate)
|
|
784
792
|
|
|
785
793
|
query = self._projects_insert().values(
|
|
786
794
|
namespace_id=namespace.id,
|
|
@@ -817,11 +825,14 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
817
825
|
"""Gets a single project inside some namespace by name"""
|
|
818
826
|
n = self._namespaces
|
|
819
827
|
p = self._projects
|
|
828
|
+
validate = True
|
|
829
|
+
|
|
820
830
|
if self._is_listing_project(name, namespace_name) or self._is_default_project(
|
|
821
831
|
name, namespace_name
|
|
822
832
|
):
|
|
823
833
|
# we are always creating default and listing projects if they don't exist
|
|
824
834
|
create = True
|
|
835
|
+
validate = False
|
|
825
836
|
|
|
826
837
|
query = self._projects_select(
|
|
827
838
|
*(getattr(n.c, f) for f in self._namespaces_fields),
|
|
@@ -834,7 +845,7 @@ class AbstractDBMetastore(AbstractMetastore):
|
|
|
834
845
|
rows = list(self.db.execute(query, conn=conn))
|
|
835
846
|
if not rows:
|
|
836
847
|
if create:
|
|
837
|
-
return self.create_project(namespace_name, name)
|
|
848
|
+
return self.create_project(namespace_name, name, validate=validate)
|
|
838
849
|
raise ProjectNotFoundError(
|
|
839
850
|
f"Project {name} in namespace {namespace_name} not found."
|
|
840
851
|
)
|
datachain/data_storage/sqlite.py
CHANGED
|
@@ -468,8 +468,12 @@ class SQLiteMetastore(AbstractDBMetastore):
|
|
|
468
468
|
be created implicitly though, to keep the same fully qualified name with
|
|
469
469
|
Studio dataset.
|
|
470
470
|
"""
|
|
471
|
-
system_namespace = self.create_namespace(
|
|
472
|
-
|
|
471
|
+
system_namespace = self.create_namespace(
|
|
472
|
+
Namespace.system(), "System namespace", validate=False
|
|
473
|
+
)
|
|
474
|
+
self.create_project(
|
|
475
|
+
system_namespace.name, Project.listing(), "Listing project", validate=False
|
|
476
|
+
)
|
|
473
477
|
|
|
474
478
|
def _check_schema_version(self) -> None:
|
|
475
479
|
"""
|
datachain/dataset.py
CHANGED
|
@@ -12,6 +12,9 @@ from typing import (
|
|
|
12
12
|
)
|
|
13
13
|
from urllib.parse import urlparse
|
|
14
14
|
|
|
15
|
+
from packaging.specifiers import SpecifierSet
|
|
16
|
+
from packaging.version import Version
|
|
17
|
+
|
|
15
18
|
from datachain import semver
|
|
16
19
|
from datachain.error import DatasetVersionNotFoundError, InvalidDatasetNameError
|
|
17
20
|
from datachain.namespace import Namespace
|
|
@@ -81,8 +84,10 @@ def create_dataset_uri(
|
|
|
81
84
|
def parse_dataset_name(name: str) -> tuple[Optional[str], Optional[str], str]:
|
|
82
85
|
"""Parses dataset name and returns namespace, project and name"""
|
|
83
86
|
if not name:
|
|
84
|
-
raise
|
|
87
|
+
raise InvalidDatasetNameError("Name must be defined to parse it")
|
|
85
88
|
split = name.split(".")
|
|
89
|
+
if len(split) > 3:
|
|
90
|
+
raise InvalidDatasetNameError(f"Invalid dataset name {name}")
|
|
86
91
|
name = split[-1]
|
|
87
92
|
project_name = split[-2] if len(split) > 1 else None
|
|
88
93
|
namespace_name = split[-3] if len(split) > 2 else None
|
|
@@ -659,13 +664,39 @@ class DatasetRecord:
|
|
|
659
664
|
return None
|
|
660
665
|
return max(versions).version
|
|
661
666
|
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
667
|
+
def latest_compatible_version(self, version_spec: str) -> Optional[str]:
|
|
668
|
+
"""
|
|
669
|
+
Returns the latest version that matches the given version specifier.
|
|
670
|
+
|
|
671
|
+
Supports Python version specifiers like:
|
|
672
|
+
- ">=1.0.0,<2.0.0" (compatible release range)
|
|
673
|
+
- "~=1.4.2" (compatible release clause)
|
|
674
|
+
- "==1.2.*" (prefix matching)
|
|
675
|
+
- ">1.0.0" (exclusive ordered comparison)
|
|
676
|
+
- ">=1.0.0" (inclusive ordered comparison)
|
|
677
|
+
- "!=1.3.0" (version exclusion)
|
|
678
|
+
|
|
679
|
+
Args:
|
|
680
|
+
version_spec: Version specifier string following PEP 440
|
|
681
|
+
|
|
682
|
+
Returns:
|
|
683
|
+
Latest compatible version string, or None if no compatible version found
|
|
684
|
+
"""
|
|
685
|
+
spec_set = SpecifierSet(version_spec)
|
|
686
|
+
|
|
687
|
+
# Convert dataset versions to packaging.Version objects
|
|
688
|
+
# and filter compatible ones
|
|
689
|
+
compatible_versions = []
|
|
690
|
+
for v in self.versions:
|
|
691
|
+
pkg_version = Version(v.version)
|
|
692
|
+
if spec_set.contains(pkg_version):
|
|
693
|
+
compatible_versions.append(v)
|
|
694
|
+
|
|
695
|
+
if not compatible_versions:
|
|
666
696
|
return None
|
|
667
697
|
|
|
668
|
-
|
|
698
|
+
# Return the latest compatible version
|
|
699
|
+
return max(compatible_versions).version
|
|
669
700
|
|
|
670
701
|
@classmethod
|
|
671
702
|
def from_dict(cls, d: dict[str, Any]) -> "DatasetRecord":
|
datachain/lib/dc/datachain.py
CHANGED
|
@@ -24,7 +24,7 @@ from pydantic import BaseModel
|
|
|
24
24
|
from tqdm import tqdm
|
|
25
25
|
|
|
26
26
|
from datachain import semver
|
|
27
|
-
from datachain.dataset import DatasetRecord
|
|
27
|
+
from datachain.dataset import DatasetRecord
|
|
28
28
|
from datachain.delta import delta_disabled
|
|
29
29
|
from datachain.error import ProjectCreateNotAllowedError, ProjectNotFoundError
|
|
30
30
|
from datachain.func import literal
|
|
@@ -557,6 +557,7 @@ class DataChain:
|
|
|
557
557
|
update_version: which part of the dataset version to automatically increase.
|
|
558
558
|
Available values: `major`, `minor` or `patch`. Default is `patch`.
|
|
559
559
|
"""
|
|
560
|
+
catalog = self.session.catalog
|
|
560
561
|
if version is not None:
|
|
561
562
|
semver.validate(version)
|
|
562
563
|
|
|
@@ -570,17 +571,10 @@ class DataChain:
|
|
|
570
571
|
" patch"
|
|
571
572
|
)
|
|
572
573
|
|
|
573
|
-
namespace_name, project_name, name =
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
or self._settings.namespace
|
|
578
|
-
or self.session.catalog.metastore.default_namespace_name
|
|
579
|
-
)
|
|
580
|
-
project_name = (
|
|
581
|
-
project_name
|
|
582
|
-
or self._settings.project
|
|
583
|
-
or self.session.catalog.metastore.default_project_name
|
|
574
|
+
namespace_name, project_name, name = catalog.get_full_dataset_name(
|
|
575
|
+
name,
|
|
576
|
+
namespace_name=self._settings.namespace,
|
|
577
|
+
project_name=self._settings.project,
|
|
584
578
|
)
|
|
585
579
|
|
|
586
580
|
try:
|
datachain/lib/dc/datasets.py
CHANGED
|
@@ -1,16 +1,12 @@
|
|
|
1
1
|
from collections.abc import Sequence
|
|
2
2
|
from typing import TYPE_CHECKING, Optional, Union, get_origin, get_type_hints
|
|
3
3
|
|
|
4
|
-
from datachain.dataset import parse_dataset_name
|
|
5
4
|
from datachain.error import (
|
|
6
5
|
DatasetNotFoundError,
|
|
7
6
|
DatasetVersionNotFoundError,
|
|
8
7
|
ProjectNotFoundError,
|
|
9
8
|
)
|
|
10
9
|
from datachain.lib.dataset_info import DatasetInfo
|
|
11
|
-
from datachain.lib.file import (
|
|
12
|
-
File,
|
|
13
|
-
)
|
|
14
10
|
from datachain.lib.projects import get as get_project
|
|
15
11
|
from datachain.lib.settings import Settings
|
|
16
12
|
from datachain.lib.signal_schema import SignalSchema
|
|
@@ -35,7 +31,6 @@ def read_dataset(
|
|
|
35
31
|
version: Optional[Union[str, int]] = None,
|
|
36
32
|
session: Optional[Session] = None,
|
|
37
33
|
settings: Optional[dict] = None,
|
|
38
|
-
fallback_to_studio: bool = True,
|
|
39
34
|
delta: Optional[bool] = False,
|
|
40
35
|
delta_on: Optional[Union[str, Sequence[str]]] = (
|
|
41
36
|
"file.path",
|
|
@@ -45,6 +40,7 @@ def read_dataset(
|
|
|
45
40
|
delta_result_on: Optional[Union[str, Sequence[str]]] = None,
|
|
46
41
|
delta_compare: Optional[Union[str, Sequence[str]]] = None,
|
|
47
42
|
delta_retry: Optional[Union[bool, str]] = None,
|
|
43
|
+
update: bool = False,
|
|
48
44
|
) -> "DataChain":
|
|
49
45
|
"""Get data from a saved Dataset. It returns the chain itself.
|
|
50
46
|
If dataset or version is not found locally, it will try to pull it from Studio.
|
|
@@ -56,11 +52,12 @@ def read_dataset(
|
|
|
56
52
|
set; otherwise, default values will be applied.
|
|
57
53
|
namespace : optional name of namespace in which dataset to read is created
|
|
58
54
|
project : optional name of project in which dataset to read is created
|
|
59
|
-
version : dataset version
|
|
55
|
+
version : dataset version. Supports:
|
|
56
|
+
- Exact version strings: "1.2.3"
|
|
57
|
+
- Legacy integer versions: 1, 2, 3 (finds latest major version)
|
|
58
|
+
- Version specifiers (PEP 440): ">=1.0.0,<2.0.0", "~=1.4.2", "==1.2.*", etc.
|
|
60
59
|
session : Session to use for the chain.
|
|
61
60
|
settings : Settings to use for the chain.
|
|
62
|
-
fallback_to_studio : Try to pull dataset from Studio if not found locally.
|
|
63
|
-
Default is True.
|
|
64
61
|
delta: If True, only process new or changed files instead of reprocessing
|
|
65
62
|
everything. This saves time by skipping files that were already processed in
|
|
66
63
|
previous versions. The optimization is working when a new version of the
|
|
@@ -80,6 +77,10 @@ def read_dataset(
|
|
|
80
77
|
(error mode)
|
|
81
78
|
- True: Reprocess records missing from the result dataset (missing mode)
|
|
82
79
|
- None: No retry processing (default)
|
|
80
|
+
update: If True always checks for newer versions available on Studio, even if
|
|
81
|
+
some version of the dataset exists locally already. If False (default), it
|
|
82
|
+
will only fetch the dataset from Studio if it is not found locally.
|
|
83
|
+
|
|
83
84
|
|
|
84
85
|
Example:
|
|
85
86
|
```py
|
|
@@ -93,11 +94,22 @@ def read_dataset(
|
|
|
93
94
|
```
|
|
94
95
|
|
|
95
96
|
```py
|
|
96
|
-
chain = dc.read_dataset("my_cats",
|
|
97
|
+
chain = dc.read_dataset("my_cats", version="1.0.0")
|
|
97
98
|
```
|
|
98
99
|
|
|
99
100
|
```py
|
|
100
|
-
|
|
101
|
+
# Using version specifiers (PEP 440)
|
|
102
|
+
chain = dc.read_dataset("my_cats", version=">=1.0.0,<2.0.0")
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
```py
|
|
106
|
+
# Legacy integer version support (finds latest in major version)
|
|
107
|
+
chain = dc.read_dataset("my_cats", version=1) # Latest 1.x.x version
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
```py
|
|
111
|
+
# Always check for newer versions matching a version specifier from Studio
|
|
112
|
+
chain = dc.read_dataset("my_cats", version=">=1.0.0", update=True)
|
|
101
113
|
```
|
|
102
114
|
|
|
103
115
|
```py
|
|
@@ -114,7 +126,6 @@ def read_dataset(
|
|
|
114
126
|
version="1.0.0",
|
|
115
127
|
session=session,
|
|
116
128
|
settings=settings,
|
|
117
|
-
fallback_to_studio=True,
|
|
118
129
|
)
|
|
119
130
|
```
|
|
120
131
|
"""
|
|
@@ -122,41 +133,49 @@ def read_dataset(
|
|
|
122
133
|
|
|
123
134
|
from .datachain import DataChain
|
|
124
135
|
|
|
136
|
+
telemetry.send_event_once("class", "datachain_init", name=name, version=version)
|
|
137
|
+
|
|
125
138
|
session = Session.get(session)
|
|
126
139
|
catalog = session.catalog
|
|
127
140
|
|
|
128
|
-
namespace_name, project_name, name =
|
|
129
|
-
|
|
130
|
-
|
|
141
|
+
namespace_name, project_name, name = catalog.get_full_dataset_name(
|
|
142
|
+
name,
|
|
143
|
+
project_name=project,
|
|
144
|
+
namespace_name=namespace,
|
|
131
145
|
)
|
|
132
|
-
project_name = project_name or project or catalog.metastore.default_project_name
|
|
133
146
|
|
|
134
147
|
if version is not None:
|
|
148
|
+
dataset = session.catalog.get_dataset_with_remote_fallback(
|
|
149
|
+
name, namespace_name, project_name, update=update
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Convert legacy integer versions to version specifiers
|
|
153
|
+
# For backward compatibility we still allow users to put version as integer
|
|
154
|
+
# in which case we convert it to a version specifier that finds the latest
|
|
155
|
+
# version where major part is equal to that input version.
|
|
156
|
+
# For example if user sets version=2, we convert it to ">=2.0.0,<3.0.0"
|
|
157
|
+
# which will find something like 2.4.3 (assuming 2.4.3 is the biggest among
|
|
158
|
+
# all 2.* dataset versions)
|
|
159
|
+
if isinstance(version, int):
|
|
160
|
+
version_spec = f">={version}.0.0,<{version + 1}.0.0"
|
|
161
|
+
else:
|
|
162
|
+
version_spec = str(version)
|
|
163
|
+
|
|
164
|
+
from packaging.specifiers import InvalidSpecifier, SpecifierSet
|
|
165
|
+
|
|
135
166
|
try:
|
|
136
|
-
#
|
|
137
|
-
|
|
138
|
-
#
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
# major part is equal to that input, exception is thrown.
|
|
142
|
-
major = int(version)
|
|
143
|
-
try:
|
|
144
|
-
ds_project = get_project(project_name, namespace_name, session=session)
|
|
145
|
-
except ProjectNotFoundError:
|
|
146
|
-
raise DatasetNotFoundError(
|
|
147
|
-
f"Dataset {name} not found in namespace {namespace_name} and",
|
|
148
|
-
f" project {project_name}",
|
|
149
|
-
) from None
|
|
150
|
-
|
|
151
|
-
dataset = session.catalog.get_dataset(name, ds_project)
|
|
152
|
-
latest_major = dataset.latest_major_version(major)
|
|
153
|
-
if not latest_major:
|
|
167
|
+
# Try to parse as version specifier
|
|
168
|
+
SpecifierSet(version_spec)
|
|
169
|
+
# If it's a valid specifier set, find the latest compatible version
|
|
170
|
+
latest_compatible = dataset.latest_compatible_version(version_spec)
|
|
171
|
+
if not latest_compatible:
|
|
154
172
|
raise DatasetVersionNotFoundError(
|
|
155
|
-
f"
|
|
173
|
+
f"No dataset {name} version matching specifier {version_spec}"
|
|
156
174
|
)
|
|
157
|
-
version =
|
|
158
|
-
except
|
|
159
|
-
#
|
|
175
|
+
version = latest_compatible
|
|
176
|
+
except InvalidSpecifier:
|
|
177
|
+
# If not a valid specifier, treat as exact version string
|
|
178
|
+
# This handles cases like "1.2.3" which are exact versions, not specifiers
|
|
160
179
|
pass
|
|
161
180
|
|
|
162
181
|
if settings:
|
|
@@ -170,11 +189,8 @@ def read_dataset(
|
|
|
170
189
|
namespace_name=namespace_name,
|
|
171
190
|
version=version, # type: ignore[arg-type]
|
|
172
191
|
session=session,
|
|
173
|
-
indexing_column_types=File._datachain_column_types,
|
|
174
|
-
fallback_to_studio=fallback_to_studio,
|
|
175
192
|
)
|
|
176
193
|
|
|
177
|
-
telemetry.send_event_once("class", "datachain_init", name=name, version=version)
|
|
178
194
|
signals_schema = SignalSchema({"sys": Sys})
|
|
179
195
|
if query.feature_schema:
|
|
180
196
|
signals_schema |= SignalSchema.deserialize(query.feature_schema)
|
|
@@ -320,11 +336,11 @@ def delete_dataset(
|
|
|
320
336
|
session = Session.get(session, in_memory=in_memory)
|
|
321
337
|
catalog = session.catalog
|
|
322
338
|
|
|
323
|
-
namespace_name, project_name, name =
|
|
324
|
-
|
|
325
|
-
|
|
339
|
+
namespace_name, project_name, name = catalog.get_full_dataset_name(
|
|
340
|
+
name,
|
|
341
|
+
project_name=project,
|
|
342
|
+
namespace_name=namespace,
|
|
326
343
|
)
|
|
327
|
-
project_name = project_name or project or catalog.metastore.default_project_name
|
|
328
344
|
|
|
329
345
|
if not catalog.metastore.is_local_dataset(namespace_name) and studio:
|
|
330
346
|
return remove_studio_dataset(
|
datachain/lib/dc/listings.py
CHANGED
|
@@ -127,12 +127,8 @@ def read_listing_dataset(
|
|
|
127
127
|
if version is None:
|
|
128
128
|
version = dataset.latest_version
|
|
129
129
|
|
|
130
|
-
query = DatasetQuery(
|
|
131
|
-
|
|
132
|
-
session=session,
|
|
133
|
-
indexing_column_types=File._datachain_column_types,
|
|
134
|
-
fallback_to_studio=False,
|
|
135
|
-
)
|
|
130
|
+
query = DatasetQuery(name=name, session=session)
|
|
131
|
+
|
|
136
132
|
if settings:
|
|
137
133
|
cfg = {**settings}
|
|
138
134
|
if "prefetch" not in cfg:
|
datachain/lib/dc/records.py
CHANGED
|
@@ -97,4 +97,4 @@ def read_records(
|
|
|
97
97
|
for chunk in batched(records, INSERT_BATCH_SIZE):
|
|
98
98
|
warehouse.insert_rows(table, chunk)
|
|
99
99
|
warehouse.insert_rows_done(table)
|
|
100
|
-
return read_dataset(name=dsr.
|
|
100
|
+
return read_dataset(name=dsr.full_name, session=session, settings=settings)
|
datachain/lib/projects.py
CHANGED
|
@@ -54,7 +54,7 @@ def get(name: str, namespace: str, session: Optional[Session]) -> Project:
|
|
|
54
54
|
```py
|
|
55
55
|
import datachain as dc
|
|
56
56
|
from datachain.lib.projects import get as get_project
|
|
57
|
-
project
|
|
57
|
+
project = get_project("my-project", "local")
|
|
58
58
|
```
|
|
59
59
|
"""
|
|
60
60
|
return Session.get(session).catalog.metastore.get_project(name, namespace)
|
datachain/lib/signal_schema.py
CHANGED
|
@@ -25,6 +25,7 @@ from pydantic import BaseModel, Field, create_model
|
|
|
25
25
|
from sqlalchemy import ColumnElement
|
|
26
26
|
from typing_extensions import Literal as LiteralEx
|
|
27
27
|
|
|
28
|
+
from datachain.func import literal
|
|
28
29
|
from datachain.func.func import Func
|
|
29
30
|
from datachain.lib.convert.python_to_sql import python_to_sql
|
|
30
31
|
from datachain.lib.convert.sql_to_python import sql_to_python
|
|
@@ -659,6 +660,7 @@ class SignalSchema:
|
|
|
659
660
|
|
|
660
661
|
def mutate(self, args_map: dict) -> "SignalSchema":
|
|
661
662
|
new_values = self.values.copy()
|
|
663
|
+
primitives = (bool, str, int, float)
|
|
662
664
|
|
|
663
665
|
for name, value in args_map.items():
|
|
664
666
|
if isinstance(value, Column) and value.name in self.values:
|
|
@@ -679,6 +681,12 @@ class SignalSchema:
|
|
|
679
681
|
# adding new signal with function
|
|
680
682
|
new_values[name] = value.get_result_type(self)
|
|
681
683
|
continue
|
|
684
|
+
if isinstance(value, primitives):
|
|
685
|
+
# For primitives, store the type, not the value
|
|
686
|
+
val = literal(value)
|
|
687
|
+
val.type = python_to_sql(type(value))()
|
|
688
|
+
new_values[name] = sql_to_python(val)
|
|
689
|
+
continue
|
|
682
690
|
if isinstance(value, ColumnElement):
|
|
683
691
|
# adding new signal
|
|
684
692
|
new_values[name] = sql_to_python(value)
|
datachain/query/dataset.py
CHANGED
|
@@ -1099,13 +1099,9 @@ class DatasetQuery:
|
|
|
1099
1099
|
namespace_name: Optional[str] = None,
|
|
1100
1100
|
catalog: Optional["Catalog"] = None,
|
|
1101
1101
|
session: Optional[Session] = None,
|
|
1102
|
-
indexing_column_types: Optional[dict[str, Any]] = None,
|
|
1103
1102
|
in_memory: bool = False,
|
|
1104
|
-
fallback_to_studio: bool = True,
|
|
1105
1103
|
update: bool = False,
|
|
1106
1104
|
) -> None:
|
|
1107
|
-
from datachain.remote.studio import is_token_set
|
|
1108
|
-
|
|
1109
1105
|
self.session = Session.get(session, catalog=catalog, in_memory=in_memory)
|
|
1110
1106
|
self.catalog = catalog or self.session.catalog
|
|
1111
1107
|
self.steps: list[Step] = []
|
|
@@ -1137,18 +1133,16 @@ class DatasetQuery:
|
|
|
1137
1133
|
# not setting query step yet as listing dataset might not exist at
|
|
1138
1134
|
# this point
|
|
1139
1135
|
self.list_ds_name = name
|
|
1140
|
-
|
|
1136
|
+
else:
|
|
1141
1137
|
self._set_starting_step(
|
|
1142
1138
|
self.catalog.get_dataset_with_remote_fallback(
|
|
1143
1139
|
name,
|
|
1144
1140
|
namespace_name=namespace_name,
|
|
1145
1141
|
project_name=project_name,
|
|
1146
1142
|
version=version,
|
|
1143
|
+
pull_dataset=True,
|
|
1147
1144
|
)
|
|
1148
1145
|
)
|
|
1149
|
-
else:
|
|
1150
|
-
project = self.catalog.metastore.get_project(project_name, namespace_name)
|
|
1151
|
-
self._set_starting_step(self.catalog.get_dataset(name, project=project))
|
|
1152
1146
|
|
|
1153
1147
|
def _set_starting_step(self, ds: "DatasetRecord") -> None:
|
|
1154
1148
|
if not self.version:
|
datachain/remote/studio.py
CHANGED
|
@@ -78,10 +78,11 @@ def _parse_dates(obj: dict, date_fields: list[str]):
|
|
|
78
78
|
|
|
79
79
|
|
|
80
80
|
class Response(Generic[T]):
|
|
81
|
-
def __init__(self, data: T, ok: bool, message: str) -> None:
|
|
81
|
+
def __init__(self, data: T, ok: bool, message: str, status: int) -> None:
|
|
82
82
|
self.data = data
|
|
83
83
|
self.ok = ok
|
|
84
84
|
self.message = message
|
|
85
|
+
self.status = status
|
|
85
86
|
|
|
86
87
|
def __repr__(self):
|
|
87
88
|
return (
|
|
@@ -186,7 +187,7 @@ class StudioClient:
|
|
|
186
187
|
message = "Indexing in progress"
|
|
187
188
|
else:
|
|
188
189
|
message = content.get("message", "")
|
|
189
|
-
return Response(response_data, ok, message)
|
|
190
|
+
return Response(response_data, ok, message, response.status_code)
|
|
190
191
|
|
|
191
192
|
@retry_with_backoff(retries=3, errors=(HTTPError, Timeout))
|
|
192
193
|
def _send_request(
|
|
@@ -236,7 +237,7 @@ class StudioClient:
|
|
|
236
237
|
else:
|
|
237
238
|
message = ""
|
|
238
239
|
|
|
239
|
-
return Response(data, ok, message)
|
|
240
|
+
return Response(data, ok, message, response.status_code)
|
|
240
241
|
|
|
241
242
|
@staticmethod
|
|
242
243
|
def _unpacker_hook(code, data):
|
|
@@ -3,7 +3,7 @@ datachain/__main__.py,sha256=hG3Y4ARGEqe1AWwNMd259rBlqtphx1Wk39YbueQ0yV8,91
|
|
|
3
3
|
datachain/asyn.py,sha256=RH_jFwJcTXxhEFomaI9yL6S3Onau6NZ6FSKfKFGtrJE,9689
|
|
4
4
|
datachain/cache.py,sha256=ESVRaCJXEThMIfGEFVHx6wJPOZA7FYk9V6WxjyuqUBY,3626
|
|
5
5
|
datachain/config.py,sha256=g8qbNV0vW2VEKpX-dGZ9pAn0DAz6G2ZFcr7SAV3PoSM,4272
|
|
6
|
-
datachain/dataset.py,sha256
|
|
6
|
+
datachain/dataset.py,sha256=wDrukmkDnYP0X8bAGY-7O1NDE3DWCFqrH8VVDpXM9Ok,25263
|
|
7
7
|
datachain/delta.py,sha256=4RqLLc9dJLF8x9GG9IDgi86DwuPerZQ4HAUnNBeACw8,8446
|
|
8
8
|
datachain/error.py,sha256=OWwWMkzZYJrkcoEDGhJHMf7SfKvxcsOLRF94mjPf29I,1609
|
|
9
9
|
datachain/job.py,sha256=x5PB6d5sqx00hePNNkirESlOVAvnmkEM5ygUgQmAhsk,1262
|
|
@@ -21,13 +21,13 @@ datachain/studio.py,sha256=bLok-eJNFRHQScEyAyA_Fas52dmijd5r-73KudWxV4k,13337
|
|
|
21
21
|
datachain/telemetry.py,sha256=0A4IOPPp9VlP5pyW9eBfaTK3YhHGzHl7dQudQjUAx9A,994
|
|
22
22
|
datachain/utils.py,sha256=DNqOi-Ydb7InyWvD9m7_yailxz6-YGpZzh00biQaHNo,15305
|
|
23
23
|
datachain/catalog/__init__.py,sha256=cMZzSz3VoUi-6qXSVaHYN-agxQuAcz2XSqnEPZ55crE,353
|
|
24
|
-
datachain/catalog/catalog.py,sha256=
|
|
24
|
+
datachain/catalog/catalog.py,sha256=z4GbRMHeW0YA20Sjh7QuPy1Rj4RkX547WN9Pp5wAD6o,65277
|
|
25
25
|
datachain/catalog/datasource.py,sha256=IkGMh0Ttg6Q-9DWfU_H05WUnZepbGa28HYleECi6K7I,1353
|
|
26
26
|
datachain/catalog/loader.py,sha256=UXjYD6BNRoupPvkiz3-b04jepXhtLHCA4gzKFnXxOtQ,5987
|
|
27
27
|
datachain/cli/__init__.py,sha256=WvBqnwjG8Wp9xGCn-4eqfoZ3n7Sj1HJemCi4MayJh_c,8221
|
|
28
28
|
datachain/cli/utils.py,sha256=wrLnAh7Wx8O_ojZE8AE4Lxn5WoxHbOj7as8NWlLAA74,3036
|
|
29
29
|
datachain/cli/commands/__init__.py,sha256=zp3bYIioO60x_X04A4-IpZqSYVnpwOa1AdERQaRlIhI,493
|
|
30
|
-
datachain/cli/commands/datasets.py,sha256=
|
|
30
|
+
datachain/cli/commands/datasets.py,sha256=LfOe22O9OCEDI8K2cy05Gp4_Q-GFHOHRv4bXQ-USM4s,6472
|
|
31
31
|
datachain/cli/commands/du.py,sha256=9edEzDEs98K2VYk8Wf-ZMpUzALcgm9uD6YtoqbvtUGU,391
|
|
32
32
|
datachain/cli/commands/index.py,sha256=eglNaIe1yyIadUHHumjtNbgIjht6kme7SS7xE3YHR88,198
|
|
33
33
|
datachain/cli/commands/ls.py,sha256=CBmk838Q-EQp04lE2Qdnpsc1GXAkC4-I-b-a_828n1E,5272
|
|
@@ -49,10 +49,10 @@ datachain/client/s3.py,sha256=6DNVGLg-woPS1DVlYVX2rIlunNblsuxyOnI1rSzhW3k,7515
|
|
|
49
49
|
datachain/data_storage/__init__.py,sha256=9Wit-oe5P46V7CJQTD0BJ5MhOa2Y9h3ddJ4VWTe-Lec,273
|
|
50
50
|
datachain/data_storage/db_engine.py,sha256=n8ojCbvVMPY2e3SG8fUaaD0b9GkVfpl_Naa_6EiHfWg,3788
|
|
51
51
|
datachain/data_storage/job.py,sha256=9r0OGwh22bHNIvLHqg8_-eJSP1YYB-BN5HOla5TdCxw,402
|
|
52
|
-
datachain/data_storage/metastore.py,sha256=
|
|
52
|
+
datachain/data_storage/metastore.py,sha256=9mWYOKK3AoHeKPGFm-WBfPrmnYHhwYeXx5MOueKTe7I,52657
|
|
53
53
|
datachain/data_storage/schema.py,sha256=o3JbURKXRg3IJyIVA4QjHHkn6byRuz7avbydU2FlvNY,9897
|
|
54
54
|
datachain/data_storage/serializer.py,sha256=6G2YtOFqqDzJf1KbvZraKGXl2XHZyVml2krunWUum5o,927
|
|
55
|
-
datachain/data_storage/sqlite.py,sha256=
|
|
55
|
+
datachain/data_storage/sqlite.py,sha256=tT_soVi6l_pFSKaDktA1t4qW_vmPvXnvYSf4TZTKZYk,30067
|
|
56
56
|
datachain/data_storage/warehouse.py,sha256=_7btARw-kd-Nx19S0qW6JqdF3VYyypQXFzsXq68SWKI,32327
|
|
57
57
|
datachain/diff/__init__.py,sha256=-OFZzgOplqO84iWgGY7kfe60NXaWR9JRIh9T-uJboAM,9668
|
|
58
58
|
datachain/fs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -82,10 +82,10 @@ datachain/lib/listing_info.py,sha256=9ua40Hw0aiQByUw3oAEeNzMavJYfW0Uhe8YdCTK-m_g
|
|
|
82
82
|
datachain/lib/meta_formats.py,sha256=zdyg6XLk3QIsSk3I7s0Ez5kaCJSlE3uq7JiGxf7UwtU,6348
|
|
83
83
|
datachain/lib/model_store.py,sha256=DNIv8Y6Jtk1_idNLzIpsThOsdW2BMAudyUCbPUcgcxk,2515
|
|
84
84
|
datachain/lib/namespaces.py,sha256=it52UbbwB8dzhesO2pMs_nThXiPQ1Ph9sD9I3GQkg5s,2099
|
|
85
|
-
datachain/lib/projects.py,sha256=
|
|
85
|
+
datachain/lib/projects.py,sha256=8lN0qV8czX1LGtWURCUvRlSJk-RpO9w9Rra_pOZus6g,2595
|
|
86
86
|
datachain/lib/pytorch.py,sha256=oBBd6cxYrcwaFz7IQajKqhGqDdNnwUZWs0wJPRizrjk,7712
|
|
87
87
|
datachain/lib/settings.py,sha256=9wi0FoHxRxNiyn99pR28IYsMkoo47jQxeXuObQr2Ar0,2929
|
|
88
|
-
datachain/lib/signal_schema.py,sha256=
|
|
88
|
+
datachain/lib/signal_schema.py,sha256=dVEqqrQQ_BS3yzU_49-Gari7IjVyMl1UT8h1WIsZabs,36489
|
|
89
89
|
datachain/lib/tar.py,sha256=MLcVjzIgBqRuJacCNpZ6kwSZNq1i2tLyROc8PVprHsA,999
|
|
90
90
|
datachain/lib/text.py,sha256=UNHm8fhidk7wdrWqacEWaA6I9ykfYqarQ2URby7jc7M,1261
|
|
91
91
|
datachain/lib/udf.py,sha256=3uITkhO8IZnX49aePheObzd5ORYi2DIDYZVMQlBAJ-s,16687
|
|
@@ -103,14 +103,14 @@ datachain/lib/convert/values_to_tuples.py,sha256=j5yZMrVUH6W7b-7yUvdCTGI7JCUAYUO
|
|
|
103
103
|
datachain/lib/dc/__init__.py,sha256=HD0NYrdy44u6kkpvgGjJcvGz-UGTHui2azghcT8ZUg0,838
|
|
104
104
|
datachain/lib/dc/csv.py,sha256=q6a9BpapGwP6nwy6c5cklxQumep2fUp9l2LAjtTJr6s,4411
|
|
105
105
|
datachain/lib/dc/database.py,sha256=g5M6NjYR1T0vKte-abV-3Ejnm-HqxTIMir5cRi_SziE,6051
|
|
106
|
-
datachain/lib/dc/datachain.py,sha256=
|
|
107
|
-
datachain/lib/dc/datasets.py,sha256=
|
|
106
|
+
datachain/lib/dc/datachain.py,sha256=dFI7JX5-41HLgA-TUR99dtR1lvk2vokaMC3mbIW1XT4,85814
|
|
107
|
+
datachain/lib/dc/datasets.py,sha256=U4xqAfs6FdW8HIJjeayQaIg1dunaIsVXYGqfq_sDSv0,13274
|
|
108
108
|
datachain/lib/dc/hf.py,sha256=PJl2wiLjdRsMz0SYbLT-6H8b-D5i2WjeH7li8HHOk_0,2145
|
|
109
109
|
datachain/lib/dc/json.py,sha256=dNijfJ-H92vU3soyR7X1IiDrWhm6yZIGG3bSnZkPdAE,2733
|
|
110
|
-
datachain/lib/dc/listings.py,sha256=
|
|
110
|
+
datachain/lib/dc/listings.py,sha256=V379Cb-7ZyquM0w7sWArQZkzInZy4GB7QQ1ZfowKzQY,4544
|
|
111
111
|
datachain/lib/dc/pandas.py,sha256=ObueUXDUFKJGu380GmazdG02ARpKAHPhSaymfmOH13E,1489
|
|
112
112
|
datachain/lib/dc/parquet.py,sha256=zYcSgrWwyEDW9UxGUSVdIVsCu15IGEf0xL8KfWQqK94,1782
|
|
113
|
-
datachain/lib/dc/records.py,sha256=
|
|
113
|
+
datachain/lib/dc/records.py,sha256=FpPbApWopUri1gIaSMsfXN4fevja4mjmfb6Q5eiaGxI,3116
|
|
114
114
|
datachain/lib/dc/storage.py,sha256=8xiV3c6k-sG14RGwNJCp0AbV6L0mNDsTVZ-Est-ccnw,7672
|
|
115
115
|
datachain/lib/dc/utils.py,sha256=VawOAlJSvAtZbsMg33s5tJe21TRx1Km3QggI1nN6tnw,3984
|
|
116
116
|
datachain/lib/dc/values.py,sha256=7l1n352xWrEdql2NhBcZ3hj8xyPglWiY4qHjFPjn6iw,1428
|
|
@@ -125,7 +125,7 @@ datachain/model/ultralytics/pose.py,sha256=pBlmt63Qe68FKmexHimUGlNbNOoOlMHXG4fzX
|
|
|
125
125
|
datachain/model/ultralytics/segment.py,sha256=63bDCj43E6iZ0hFI5J6uQfksdCmjEp6sEm1XzVaE8pw,2986
|
|
126
126
|
datachain/query/__init__.py,sha256=7DhEIjAA8uZJfejruAVMZVcGFmvUpffuZJwgRqNwe-c,263
|
|
127
127
|
datachain/query/batch.py,sha256=-goxLpE0EUvaDHu66rstj53UnfHpYfBUGux8GSpJ93k,4306
|
|
128
|
-
datachain/query/dataset.py,sha256=
|
|
128
|
+
datachain/query/dataset.py,sha256=C60VM0pScsrWcMqLNdX-tU0HE1SnEE9lRN3TU8CfTu4,61223
|
|
129
129
|
datachain/query/dispatch.py,sha256=A0nPxn6mEN5d9dDo6S8m16Ji_9IvJLXrgF2kqXdi4fs,15546
|
|
130
130
|
datachain/query/metrics.py,sha256=DOK5HdNVaRugYPjl8qnBONvTkwjMloLqAr7Mi3TjCO0,858
|
|
131
131
|
datachain/query/params.py,sha256=O_j89mjYRLOwWNhYZl-z7mi-rkdP7WyFmaDufsdTryE,863
|
|
@@ -135,7 +135,7 @@ datachain/query/session.py,sha256=gKblltJAVQAVSTswAgWGDgGbpmFlFzFVkIQojDCjgXM,68
|
|
|
135
135
|
datachain/query/udf.py,sha256=e753bDJzTNjGFQn1WGTvOAWSwjDbrFI1-_DDWkWN2ls,1343
|
|
136
136
|
datachain/query/utils.py,sha256=HaSDNH_XGvp_NIcXjcB7j4vJRPi4_tbztDWclYelHY4,1208
|
|
137
137
|
datachain/remote/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
138
|
-
datachain/remote/studio.py,sha256=
|
|
138
|
+
datachain/remote/studio.py,sha256=oJp2KD9eO8zQDnPfNpAALZYsOlBfqVKKRTeCkEpcsYk,15196
|
|
139
139
|
datachain/sql/__init__.py,sha256=6SQRdbljO3d2hx3EAVXEZrHQKv5jth0Jh98PogT59No,262
|
|
140
140
|
datachain/sql/selectable.py,sha256=cTc60qVoAwqqss0Vop8Lt5Z-ROnM1XrQmL_GLjRxhXs,1765
|
|
141
141
|
datachain/sql/types.py,sha256=ASSPkmM5EzdRindqj2O7WHLXq8VHAgFYedG8lYfGvVI,14045
|
|
@@ -157,9 +157,9 @@ datachain/sql/sqlite/vector.py,sha256=ncW4eu2FlJhrP_CIpsvtkUabZlQdl2D5Lgwy_cbfqR
|
|
|
157
157
|
datachain/toolkit/__init__.py,sha256=eQ58Q5Yf_Fgv1ZG0IO5dpB4jmP90rk8YxUWmPc1M2Bo,68
|
|
158
158
|
datachain/toolkit/split.py,sha256=ktGWzY4kyzjWyR86dhvzw-Zhl0lVk_LOX3NciTac6qo,2914
|
|
159
159
|
datachain/torch/__init__.py,sha256=gIS74PoEPy4TB3X6vx9nLO0Y3sLJzsA8ckn8pRWihJM,579
|
|
160
|
-
datachain-0.
|
|
161
|
-
datachain-0.
|
|
162
|
-
datachain-0.
|
|
163
|
-
datachain-0.
|
|
164
|
-
datachain-0.
|
|
165
|
-
datachain-0.
|
|
160
|
+
datachain-0.24.0.dist-info/licenses/LICENSE,sha256=8DnqK5yoPI_E50bEg_zsHKZHY2HqPy4rYN338BHQaRA,11344
|
|
161
|
+
datachain-0.24.0.dist-info/METADATA,sha256=QWSVON3r5d5d18gRMs9G5DNV4z-kBBY47dMYUEFR0b0,13281
|
|
162
|
+
datachain-0.24.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
163
|
+
datachain-0.24.0.dist-info/entry_points.txt,sha256=0GMJS6B_KWq0m3VT98vQI2YZodAMkn4uReZ_okga9R4,49
|
|
164
|
+
datachain-0.24.0.dist-info/top_level.txt,sha256=lZPpdU_2jJABLNIg2kvEOBi8PtsYikbN1OdMLHk8bTg,10
|
|
165
|
+
datachain-0.24.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|