datachain 0.19.1__py3-none-any.whl → 0.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/__init__.py +3 -0
- datachain/catalog/catalog.py +180 -65
- datachain/cli/__init__.py +0 -7
- datachain/cli/commands/datasets.py +43 -28
- datachain/cli/parser/__init__.py +1 -35
- datachain/cli/parser/job.py +25 -0
- datachain/cli/parser/studio.py +11 -4
- datachain/data_storage/metastore.py +390 -37
- datachain/data_storage/schema.py +23 -1
- datachain/data_storage/sqlite.py +139 -7
- datachain/data_storage/warehouse.py +26 -7
- datachain/dataset.py +125 -12
- datachain/delta.py +9 -5
- datachain/error.py +36 -0
- datachain/lib/dataset_info.py +4 -0
- datachain/lib/dc/datachain.py +86 -7
- datachain/lib/dc/datasets.py +62 -12
- datachain/lib/dc/listings.py +111 -0
- datachain/lib/dc/records.py +1 -0
- datachain/lib/dc/storage.py +14 -2
- datachain/lib/listing.py +3 -1
- datachain/lib/namespaces.py +73 -0
- datachain/lib/projects.py +86 -0
- datachain/lib/settings.py +10 -0
- datachain/listing.py +3 -1
- datachain/namespace.py +65 -0
- datachain/project.py +78 -0
- datachain/query/dataset.py +71 -46
- datachain/query/session.py +1 -1
- datachain/remote/studio.py +67 -26
- datachain/studio.py +68 -8
- {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/METADATA +2 -2
- {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/RECORD +37 -33
- {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/WHEEL +0 -0
- {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/entry_points.txt +0 -0
- {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/licenses/LICENSE +0 -0
- {datachain-0.19.1.dist-info → datachain-0.20.0.dist-info}/top_level.txt +0 -0
|
@@ -8,7 +8,8 @@ if TYPE_CHECKING:
|
|
|
8
8
|
|
|
9
9
|
from datachain.cli.utils import determine_flavors
|
|
10
10
|
from datachain.config import Config
|
|
11
|
-
from datachain.
|
|
11
|
+
from datachain.dataset import parse_dataset_name
|
|
12
|
+
from datachain.error import DataChainError, DatasetNotFoundError
|
|
12
13
|
from datachain.studio import list_datasets as list_datasets_studio
|
|
13
14
|
|
|
14
15
|
|
|
@@ -101,11 +102,15 @@ def list_datasets_local(catalog: "Catalog", name: Optional[str] = None):
|
|
|
101
102
|
|
|
102
103
|
for d in catalog.ls_datasets():
|
|
103
104
|
for v in d.versions:
|
|
104
|
-
yield (d.
|
|
105
|
+
yield (d.full_name, v.version)
|
|
105
106
|
|
|
106
107
|
|
|
107
108
|
def list_datasets_local_versions(catalog: "Catalog", name: str):
|
|
108
|
-
|
|
109
|
+
namespace_name, project_name, name = parse_dataset_name(name)
|
|
110
|
+
namespace_name = namespace_name or catalog.metastore.default_namespace_name
|
|
111
|
+
project_name = project_name or catalog.metastore.default_project_name
|
|
112
|
+
project = catalog.metastore.get_project(project_name, namespace_name)
|
|
113
|
+
ds = catalog.get_dataset(name, project)
|
|
109
114
|
for v in ds.versions:
|
|
110
115
|
yield (name, v.version)
|
|
111
116
|
|
|
@@ -129,25 +134,29 @@ def rm_dataset(
|
|
|
129
134
|
name: str,
|
|
130
135
|
version: Optional[str] = None,
|
|
131
136
|
force: Optional[bool] = False,
|
|
132
|
-
studio: bool = False,
|
|
133
|
-
local: bool = False,
|
|
134
|
-
all: bool = True,
|
|
137
|
+
studio: Optional[bool] = False,
|
|
135
138
|
team: Optional[str] = None,
|
|
136
139
|
):
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
140
|
+
namespace_name, project_name, name = parse_dataset_name(name)
|
|
141
|
+
namespace_name = namespace_name or catalog.metastore.default_namespace_name
|
|
142
|
+
project_name = project_name or catalog.metastore.default_project_name
|
|
143
|
+
|
|
144
|
+
if not catalog.metastore.is_local_dataset(namespace_name) and studio:
|
|
145
|
+
from datachain.studio import remove_studio_dataset
|
|
146
|
+
|
|
147
|
+
token = Config().read().get("studio", {}).get("token")
|
|
148
|
+
if not token:
|
|
149
|
+
raise DataChainError(
|
|
150
|
+
"Not logged in to Studio. Log in with 'datachain auth login'."
|
|
151
|
+
)
|
|
152
|
+
remove_studio_dataset(team, name, namespace_name, project_name, version, force)
|
|
153
|
+
else:
|
|
143
154
|
try:
|
|
144
|
-
catalog.
|
|
155
|
+
project = catalog.metastore.get_project(project_name, namespace_name)
|
|
156
|
+
catalog.remove_dataset(name, project, version=version, force=force)
|
|
145
157
|
except DatasetNotFoundError:
|
|
146
158
|
print("Dataset not found in local", file=sys.stderr)
|
|
147
159
|
|
|
148
|
-
if (all or studio) and token:
|
|
149
|
-
remove_studio_dataset(team, name, version, force)
|
|
150
|
-
|
|
151
160
|
|
|
152
161
|
def edit_dataset(
|
|
153
162
|
catalog: "Catalog",
|
|
@@ -155,21 +164,27 @@ def edit_dataset(
|
|
|
155
164
|
new_name: Optional[str] = None,
|
|
156
165
|
description: Optional[str] = None,
|
|
157
166
|
attrs: Optional[list[str]] = None,
|
|
158
|
-
studio: bool = False,
|
|
159
|
-
local: bool = False,
|
|
160
|
-
all: bool = True,
|
|
161
167
|
team: Optional[str] = None,
|
|
162
168
|
):
|
|
163
|
-
|
|
169
|
+
namespace_name, project_name, name = parse_dataset_name(name)
|
|
170
|
+
namespace_name = namespace_name or catalog.metastore.default_namespace_name
|
|
171
|
+
project_name = project_name or catalog.metastore.default_project_name
|
|
164
172
|
|
|
165
|
-
|
|
166
|
-
all, local, studio = determine_flavors(studio, local, all, token)
|
|
167
|
-
|
|
168
|
-
if all or local:
|
|
173
|
+
if catalog.metastore.is_local_dataset(namespace_name):
|
|
169
174
|
try:
|
|
170
|
-
catalog.edit_dataset(
|
|
175
|
+
catalog.edit_dataset(
|
|
176
|
+
name, catalog.metastore.default_project, new_name, description, attrs
|
|
177
|
+
)
|
|
171
178
|
except DatasetNotFoundError:
|
|
172
179
|
print("Dataset not found in local", file=sys.stderr)
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
180
|
+
else:
|
|
181
|
+
from datachain.studio import edit_studio_dataset
|
|
182
|
+
|
|
183
|
+
token = Config().read().get("studio", {}).get("token")
|
|
184
|
+
if not token:
|
|
185
|
+
raise DataChainError(
|
|
186
|
+
"Not logged in to Studio. Log in with 'datachain auth login'."
|
|
187
|
+
)
|
|
188
|
+
edit_studio_dataset(
|
|
189
|
+
team, name, namespace_name, project_name, new_name, description, attrs
|
|
190
|
+
)
|
datachain/cli/parser/__init__.py
CHANGED
|
@@ -221,26 +221,6 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
221
221
|
nargs="+",
|
|
222
222
|
help="Dataset attributes",
|
|
223
223
|
)
|
|
224
|
-
parse_edit_dataset.add_argument(
|
|
225
|
-
"--studio",
|
|
226
|
-
action="store_true",
|
|
227
|
-
default=False,
|
|
228
|
-
help="Edit dataset from Studio",
|
|
229
|
-
)
|
|
230
|
-
parse_edit_dataset.add_argument(
|
|
231
|
-
"-L",
|
|
232
|
-
"--local",
|
|
233
|
-
action="store_true",
|
|
234
|
-
default=False,
|
|
235
|
-
help="Edit local dataset only",
|
|
236
|
-
)
|
|
237
|
-
parse_edit_dataset.add_argument(
|
|
238
|
-
"-a",
|
|
239
|
-
"--all",
|
|
240
|
-
action="store_true",
|
|
241
|
-
default=True,
|
|
242
|
-
help="Edit both datasets from studio and local",
|
|
243
|
-
)
|
|
244
224
|
parse_edit_dataset.add_argument(
|
|
245
225
|
"--team",
|
|
246
226
|
action="store",
|
|
@@ -315,21 +295,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
315
295
|
"--studio",
|
|
316
296
|
action="store_true",
|
|
317
297
|
default=False,
|
|
318
|
-
help="Remove dataset from Studio",
|
|
319
|
-
)
|
|
320
|
-
rm_dataset_parser.add_argument(
|
|
321
|
-
"-L",
|
|
322
|
-
"--local",
|
|
323
|
-
action="store_true",
|
|
324
|
-
default=False,
|
|
325
|
-
help="Remove local datasets only",
|
|
326
|
-
)
|
|
327
|
-
rm_dataset_parser.add_argument(
|
|
328
|
-
"-a",
|
|
329
|
-
"--all",
|
|
330
|
-
action="store_true",
|
|
331
|
-
default=True,
|
|
332
|
-
help="Remove both local and studio",
|
|
298
|
+
help="Remove dataset from Studio only",
|
|
333
299
|
)
|
|
334
300
|
rm_dataset_parser.add_argument(
|
|
335
301
|
"--team",
|
datachain/cli/parser/job.py
CHANGED
|
@@ -51,6 +51,13 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
51
51
|
help="Environment variables in KEY=VALUE format",
|
|
52
52
|
)
|
|
53
53
|
|
|
54
|
+
studio_run_parser.add_argument(
|
|
55
|
+
"--cluster-id",
|
|
56
|
+
type=int,
|
|
57
|
+
action="store",
|
|
58
|
+
help="Compute cluster ID to run the job on",
|
|
59
|
+
)
|
|
60
|
+
|
|
54
61
|
studio_run_parser.add_argument(
|
|
55
62
|
"--workers",
|
|
56
63
|
type=int,
|
|
@@ -165,3 +172,21 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
165
172
|
default=None,
|
|
166
173
|
help="Team to check logs for (default: from config)",
|
|
167
174
|
)
|
|
175
|
+
|
|
176
|
+
studio_clusters_help = "List compute clusters in Studio"
|
|
177
|
+
studio_clusters_description = "List compute clusters in Studio."
|
|
178
|
+
|
|
179
|
+
studio_clusters_parser = jobs_subparser.add_parser(
|
|
180
|
+
"clusters",
|
|
181
|
+
parents=[parent_parser],
|
|
182
|
+
description=studio_clusters_description,
|
|
183
|
+
help=studio_clusters_help,
|
|
184
|
+
formatter_class=CustomHelpFormatter,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
studio_clusters_parser.add_argument(
|
|
188
|
+
"--team",
|
|
189
|
+
action="store",
|
|
190
|
+
default=None,
|
|
191
|
+
help="Team to list clusters for (default: from config)",
|
|
192
|
+
)
|
datachain/cli/parser/studio.py
CHANGED
|
@@ -89,8 +89,13 @@ def add_auth_parser(subparsers, parent_parser) -> None:
|
|
|
89
89
|
help="Remove the token from the local project config",
|
|
90
90
|
)
|
|
91
91
|
|
|
92
|
-
auth_team_help = "Set default team for Studio operations"
|
|
93
|
-
auth_team_description =
|
|
92
|
+
auth_team_help = "Set or show default team for Studio operations"
|
|
93
|
+
auth_team_description = (
|
|
94
|
+
"Set or show the default team for Studio operations. "
|
|
95
|
+
"This will be used globally by default. "
|
|
96
|
+
"Use --local to set the team locally for the current project. "
|
|
97
|
+
"If no team name is provided, the default team will be shown."
|
|
98
|
+
)
|
|
94
99
|
|
|
95
100
|
team_parser = auth_subparser.add_parser(
|
|
96
101
|
"team",
|
|
@@ -102,13 +107,15 @@ def add_auth_parser(subparsers, parent_parser) -> None:
|
|
|
102
107
|
team_parser.add_argument(
|
|
103
108
|
"team_name",
|
|
104
109
|
action="store",
|
|
110
|
+
default=None,
|
|
111
|
+
nargs="?",
|
|
105
112
|
help="Name of the team to set as default",
|
|
106
113
|
)
|
|
107
114
|
team_parser.add_argument(
|
|
108
|
-
"--
|
|
115
|
+
"--local",
|
|
109
116
|
action="store_true",
|
|
110
117
|
default=False,
|
|
111
|
-
help="Set team
|
|
118
|
+
help="Set team locally for the current project",
|
|
112
119
|
)
|
|
113
120
|
|
|
114
121
|
auth_token_help = "View Studio authentication token" # noqa: S105
|