datachain 0.8.8__py3-none-any.whl → 0.8.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datachain might be problematic. Click here for more details.
- datachain/cli/__init__.py +14 -7
- datachain/cli/commands/datasets.py +2 -3
- datachain/cli/parser/__init__.py +69 -82
- datachain/cli/parser/job.py +20 -25
- datachain/cli/parser/studio.py +41 -65
- datachain/cli/parser/utils.py +1 -1
- datachain/cli/utils.py +1 -1
- datachain/client/local.py +1 -1
- datachain/data_storage/sqlite.py +38 -7
- datachain/data_storage/warehouse.py +2 -2
- datachain/lib/arrow.py +1 -1
- datachain/lib/convert/python_to_sql.py +15 -3
- datachain/lib/convert/unflatten.py +1 -2
- datachain/lib/dc.py +26 -5
- datachain/lib/file.py +27 -4
- datachain/lib/listing.py +4 -4
- datachain/lib/pytorch.py +3 -1
- datachain/lib/udf.py +56 -20
- datachain/model/bbox.py +9 -9
- datachain/model/pose.py +9 -9
- datachain/model/segment.py +6 -6
- datachain/progress.py +0 -13
- datachain/query/dataset.py +20 -14
- datachain/remote/studio.py +2 -2
- datachain/sql/sqlite/base.py +35 -14
- datachain/studio.py +22 -16
- {datachain-0.8.8.dist-info → datachain-0.8.10.dist-info}/METADATA +4 -3
- {datachain-0.8.8.dist-info → datachain-0.8.10.dist-info}/RECORD +32 -32
- {datachain-0.8.8.dist-info → datachain-0.8.10.dist-info}/LICENSE +0 -0
- {datachain-0.8.8.dist-info → datachain-0.8.10.dist-info}/WHEEL +0 -0
- {datachain-0.8.8.dist-info → datachain-0.8.10.dist-info}/entry_points.txt +0 -0
- {datachain-0.8.8.dist-info → datachain-0.8.10.dist-info}/top_level.txt +0 -0
datachain/cli/__init__.py
CHANGED
|
@@ -39,12 +39,15 @@ def main(argv: Optional[list[str]] = None) -> int:
|
|
|
39
39
|
if args.command in ("internal-run-udf", "internal-run-udf-worker"):
|
|
40
40
|
return handle_udf(args.command)
|
|
41
41
|
|
|
42
|
+
if args.command is None:
|
|
43
|
+
datachain_parser.print_help(sys.stderr)
|
|
44
|
+
return 1
|
|
45
|
+
|
|
42
46
|
logger.addHandler(logging.StreamHandler())
|
|
43
47
|
logging_level = get_logging_level(args)
|
|
44
48
|
logger.setLevel(logging_level)
|
|
45
49
|
|
|
46
50
|
client_config = {
|
|
47
|
-
"aws_endpoint_url": args.aws_endpoint_url,
|
|
48
51
|
"anon": args.anon,
|
|
49
52
|
}
|
|
50
53
|
|
|
@@ -69,7 +72,7 @@ def main(argv: Optional[list[str]] = None) -> int:
|
|
|
69
72
|
|
|
70
73
|
def handle_command(args, catalog, client_config) -> int:
|
|
71
74
|
"""Handle the different CLI commands."""
|
|
72
|
-
from datachain.studio import
|
|
75
|
+
from datachain.studio import process_auth_cli_args, process_jobs_args
|
|
73
76
|
|
|
74
77
|
command_handlers = {
|
|
75
78
|
"cp": lambda: handle_cp_command(args, catalog),
|
|
@@ -85,7 +88,7 @@ def handle_command(args, catalog, client_config) -> int:
|
|
|
85
88
|
"query": lambda: handle_query_command(args, catalog),
|
|
86
89
|
"clear-cache": lambda: clear_cache(catalog),
|
|
87
90
|
"gc": lambda: garbage_collect(catalog),
|
|
88
|
-
"
|
|
91
|
+
"auth": lambda: process_auth_cli_args(args),
|
|
89
92
|
"job": lambda: process_jobs_args(args),
|
|
90
93
|
}
|
|
91
94
|
|
|
@@ -120,12 +123,17 @@ def handle_clone_command(args, catalog):
|
|
|
120
123
|
recursive=bool(args.recursive),
|
|
121
124
|
no_glob=args.no_glob,
|
|
122
125
|
no_cp=args.no_cp,
|
|
123
|
-
edatachain=args.edatachain,
|
|
124
|
-
edatachain_file=args.edatachain_file,
|
|
125
126
|
)
|
|
126
127
|
|
|
127
128
|
|
|
128
129
|
def handle_dataset_command(args, catalog):
|
|
130
|
+
if args.datasets_cmd is None:
|
|
131
|
+
print(
|
|
132
|
+
f"Use 'datachain {args.command} --help' to see available options",
|
|
133
|
+
file=sys.stderr,
|
|
134
|
+
)
|
|
135
|
+
return 1
|
|
136
|
+
|
|
129
137
|
dataset_commands = {
|
|
130
138
|
"pull": lambda: catalog.pull_dataset(
|
|
131
139
|
args.dataset,
|
|
@@ -134,8 +142,6 @@ def handle_dataset_command(args, catalog):
|
|
|
134
142
|
local_ds_version=args.local_version,
|
|
135
143
|
cp=args.cp,
|
|
136
144
|
force=bool(args.force),
|
|
137
|
-
edatachain=args.edatachain,
|
|
138
|
-
edatachain_file=args.edatachain_file,
|
|
139
145
|
),
|
|
140
146
|
"edit": lambda: edit_dataset(
|
|
141
147
|
catalog,
|
|
@@ -187,6 +193,7 @@ def handle_dataset_command(args, catalog):
|
|
|
187
193
|
handler = dataset_commands.get(args.datasets_cmd)
|
|
188
194
|
if handler:
|
|
189
195
|
return handler()
|
|
196
|
+
|
|
190
197
|
raise Exception(f"Unexpected command {args.datasets_cmd}")
|
|
191
198
|
|
|
192
199
|
|
|
@@ -11,6 +11,7 @@ if TYPE_CHECKING:
|
|
|
11
11
|
from datachain.cli.utils import determine_flavors
|
|
12
12
|
from datachain.config import Config
|
|
13
13
|
from datachain.error import DatasetNotFoundError
|
|
14
|
+
from datachain.studio import list_datasets as list_datasets_studio
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def list_datasets(
|
|
@@ -20,14 +21,12 @@ def list_datasets(
|
|
|
20
21
|
all: bool = True,
|
|
21
22
|
team: Optional[str] = None,
|
|
22
23
|
):
|
|
23
|
-
from datachain.studio import list_datasets
|
|
24
|
-
|
|
25
24
|
token = Config().read().get("studio", {}).get("token")
|
|
26
25
|
all, local, studio = determine_flavors(studio, local, all, token)
|
|
27
26
|
|
|
28
27
|
local_datasets = set(list_datasets_local(catalog)) if all or local else set()
|
|
29
28
|
studio_datasets = (
|
|
30
|
-
set(
|
|
29
|
+
set(list_datasets_studio(team=team)) if (all or studio) and token else set()
|
|
31
30
|
)
|
|
32
31
|
|
|
33
32
|
rows = [
|
datachain/cli/parser/__init__.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import argparse
|
|
1
2
|
from argparse import ArgumentParser
|
|
2
3
|
from importlib.metadata import PackageNotFoundError, version
|
|
3
4
|
|
|
@@ -6,7 +7,7 @@ import shtab
|
|
|
6
7
|
from datachain.cli.utils import BooleanOptionalAction, KeyValueArgs
|
|
7
8
|
|
|
8
9
|
from .job import add_jobs_parser
|
|
9
|
-
from .studio import
|
|
10
|
+
from .studio import add_auth_parser
|
|
10
11
|
from .utils import FIND_COLUMNS, add_show_args, add_sources_arg, find_columns_type
|
|
11
12
|
|
|
12
13
|
|
|
@@ -18,61 +19,64 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
18
19
|
__version__ = "unknown"
|
|
19
20
|
|
|
20
21
|
parser = ArgumentParser(
|
|
21
|
-
description="DataChain: Wrangle unstructured AI data at scale",
|
|
22
|
+
description="DataChain: Wrangle unstructured AI data at scale.",
|
|
23
|
+
prog="datachain",
|
|
22
24
|
)
|
|
23
25
|
parser.add_argument("-V", "--version", action="version", version=__version__)
|
|
24
26
|
|
|
25
27
|
parent_parser = ArgumentParser(add_help=False)
|
|
26
28
|
parent_parser.add_argument(
|
|
27
|
-
"
|
|
28
|
-
type=str,
|
|
29
|
-
help="AWS endpoint URL",
|
|
30
|
-
)
|
|
31
|
-
parent_parser.add_argument(
|
|
32
|
-
"--anon",
|
|
33
|
-
action="store_true",
|
|
34
|
-
help="AWS anon (aka awscli's --no-sign-request)",
|
|
29
|
+
"-v", "--verbose", action="count", default=0, help="Be verbose"
|
|
35
30
|
)
|
|
36
31
|
parent_parser.add_argument(
|
|
37
|
-
"-
|
|
32
|
+
"-q", "--quiet", action="count", default=0, help="Be quiet"
|
|
38
33
|
)
|
|
34
|
+
|
|
39
35
|
parent_parser.add_argument(
|
|
40
|
-
"
|
|
36
|
+
"--anon",
|
|
37
|
+
action="store_true",
|
|
38
|
+
help="Use anonymous access to storage",
|
|
41
39
|
)
|
|
42
40
|
parent_parser.add_argument(
|
|
43
|
-
"-
|
|
41
|
+
"-u",
|
|
42
|
+
"--update",
|
|
43
|
+
action="count",
|
|
44
|
+
default=0,
|
|
45
|
+
help="Update cached list of files for the sources",
|
|
44
46
|
)
|
|
47
|
+
|
|
45
48
|
parent_parser.add_argument(
|
|
46
49
|
"--debug-sql",
|
|
47
50
|
action="store_true",
|
|
48
51
|
default=False,
|
|
49
|
-
help=
|
|
52
|
+
help=argparse.SUPPRESS,
|
|
50
53
|
)
|
|
51
54
|
parent_parser.add_argument(
|
|
52
55
|
"--pdb",
|
|
53
56
|
action="store_true",
|
|
54
57
|
default=False,
|
|
55
|
-
help=
|
|
58
|
+
help=argparse.SUPPRESS,
|
|
56
59
|
)
|
|
57
60
|
|
|
58
61
|
subp = parser.add_subparsers(
|
|
59
62
|
title="Available Commands",
|
|
60
63
|
metavar="command",
|
|
61
64
|
dest="command",
|
|
62
|
-
help=f"Use `{parser.prog} command --help` for command-specific help
|
|
63
|
-
required=True,
|
|
65
|
+
help=f"Use `{parser.prog} command --help` for command-specific help",
|
|
64
66
|
)
|
|
65
67
|
parse_cp = subp.add_parser(
|
|
66
|
-
"cp", parents=[parent_parser], description="Copy data files from the cloud"
|
|
68
|
+
"cp", parents=[parent_parser], description="Copy data files from the cloud."
|
|
67
69
|
)
|
|
68
70
|
add_sources_arg(parse_cp).complete = shtab.DIR # type: ignore[attr-defined]
|
|
69
|
-
parse_cp.add_argument(
|
|
71
|
+
parse_cp.add_argument(
|
|
72
|
+
"output", type=str, help="Path to a directory or file to put data to"
|
|
73
|
+
)
|
|
70
74
|
parse_cp.add_argument(
|
|
71
75
|
"-f",
|
|
72
76
|
"--force",
|
|
73
77
|
default=False,
|
|
74
78
|
action="store_true",
|
|
75
|
-
help="Force creating
|
|
79
|
+
help="Force creating files even if they already exist",
|
|
76
80
|
)
|
|
77
81
|
parse_cp.add_argument(
|
|
78
82
|
"-r",
|
|
@@ -90,10 +94,12 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
90
94
|
)
|
|
91
95
|
|
|
92
96
|
parse_clone = subp.add_parser(
|
|
93
|
-
"clone", parents=[parent_parser], description="Copy data files from the cloud"
|
|
97
|
+
"clone", parents=[parent_parser], description="Copy data files from the cloud."
|
|
94
98
|
)
|
|
95
99
|
add_sources_arg(parse_clone).complete = shtab.DIR # type: ignore[attr-defined]
|
|
96
|
-
parse_clone.add_argument(
|
|
100
|
+
parse_clone.add_argument(
|
|
101
|
+
"output", type=str, help="Path to a directory or file to put data to"
|
|
102
|
+
)
|
|
97
103
|
parse_clone.add_argument(
|
|
98
104
|
"-f",
|
|
99
105
|
"--force",
|
|
@@ -121,40 +127,30 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
121
127
|
action="store_true",
|
|
122
128
|
help="Do not copy files, just create a dataset",
|
|
123
129
|
)
|
|
124
|
-
parse_clone.add_argument(
|
|
125
|
-
"--edatachain",
|
|
126
|
-
default=False,
|
|
127
|
-
action="store_true",
|
|
128
|
-
help="Create a .edatachain file",
|
|
129
|
-
)
|
|
130
|
-
parse_clone.add_argument(
|
|
131
|
-
"--edatachain-file",
|
|
132
|
-
help="Use a different filename for the resulting .edatachain file",
|
|
133
|
-
)
|
|
134
130
|
|
|
135
|
-
|
|
131
|
+
add_auth_parser(subp, parent_parser)
|
|
136
132
|
add_jobs_parser(subp, parent_parser)
|
|
137
133
|
|
|
138
134
|
datasets_parser = subp.add_parser(
|
|
139
135
|
"dataset",
|
|
140
136
|
aliases=["ds"],
|
|
141
137
|
parents=[parent_parser],
|
|
142
|
-
description="Commands for managing
|
|
138
|
+
description="Commands for managing datasets.",
|
|
143
139
|
)
|
|
144
140
|
datasets_subparser = datasets_parser.add_subparsers(
|
|
145
141
|
dest="datasets_cmd",
|
|
146
|
-
help="Use `datachain
|
|
142
|
+
help="Use `datachain dataset CMD --help` to display command-specific help",
|
|
147
143
|
)
|
|
148
144
|
|
|
149
145
|
parse_pull = datasets_subparser.add_parser(
|
|
150
146
|
"pull",
|
|
151
147
|
parents=[parent_parser],
|
|
152
|
-
description="Pull specific dataset version from
|
|
148
|
+
description="Pull specific dataset version from Studio.",
|
|
153
149
|
)
|
|
154
150
|
parse_pull.add_argument(
|
|
155
151
|
"dataset",
|
|
156
152
|
type=str,
|
|
157
|
-
help="Name and version of remote dataset created in
|
|
153
|
+
help="Name and version of remote dataset created in Studio",
|
|
158
154
|
)
|
|
159
155
|
parse_pull.add_argument("-o", "--output", type=str, help="Output")
|
|
160
156
|
parse_pull.add_argument(
|
|
@@ -178,16 +174,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
178
174
|
action="store_true",
|
|
179
175
|
help="Copy actual files after pulling remote dataset into local DB",
|
|
180
176
|
)
|
|
181
|
-
|
|
182
|
-
"--edatachain",
|
|
183
|
-
default=False,
|
|
184
|
-
action="store_true",
|
|
185
|
-
help="Create .edatachain file",
|
|
186
|
-
)
|
|
187
|
-
parse_pull.add_argument(
|
|
188
|
-
"--edatachain-file",
|
|
189
|
-
help="Use a different filename for the resulting .edatachain file",
|
|
190
|
-
)
|
|
177
|
+
|
|
191
178
|
parse_pull.add_argument(
|
|
192
179
|
"--local-name",
|
|
193
180
|
action="store",
|
|
@@ -202,7 +189,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
202
189
|
)
|
|
203
190
|
|
|
204
191
|
parse_edit_dataset = datasets_subparser.add_parser(
|
|
205
|
-
"edit", parents=[parent_parser], description="Edit dataset metadata"
|
|
192
|
+
"edit", parents=[parent_parser], description="Edit dataset metadata."
|
|
206
193
|
)
|
|
207
194
|
parse_edit_dataset.add_argument("name", type=str, help="Dataset name")
|
|
208
195
|
parse_edit_dataset.add_argument(
|
|
@@ -244,41 +231,41 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
244
231
|
"--team",
|
|
245
232
|
action="store",
|
|
246
233
|
default=None,
|
|
247
|
-
help="The team to edit a dataset. By default, it will use team from config
|
|
234
|
+
help="The team to edit a dataset. By default, it will use team from config",
|
|
248
235
|
)
|
|
249
236
|
|
|
250
|
-
|
|
251
|
-
"ls", parents=[parent_parser], description="List datasets"
|
|
237
|
+
datasets_ls_parser = datasets_subparser.add_parser(
|
|
238
|
+
"ls", parents=[parent_parser], description="List datasets."
|
|
252
239
|
)
|
|
253
|
-
|
|
240
|
+
datasets_ls_parser.add_argument(
|
|
254
241
|
"--studio",
|
|
255
242
|
action="store_true",
|
|
256
243
|
default=False,
|
|
257
244
|
help="List the files in the Studio",
|
|
258
245
|
)
|
|
259
|
-
|
|
246
|
+
datasets_ls_parser.add_argument(
|
|
260
247
|
"-L",
|
|
261
248
|
"--local",
|
|
262
249
|
action="store_true",
|
|
263
250
|
default=False,
|
|
264
251
|
help="List local files only",
|
|
265
252
|
)
|
|
266
|
-
|
|
253
|
+
datasets_ls_parser.add_argument(
|
|
267
254
|
"-a",
|
|
268
255
|
"--all",
|
|
269
256
|
action="store_true",
|
|
270
257
|
default=True,
|
|
271
258
|
help="List all files including hidden files",
|
|
272
259
|
)
|
|
273
|
-
|
|
260
|
+
datasets_ls_parser.add_argument(
|
|
274
261
|
"--team",
|
|
275
262
|
action="store",
|
|
276
263
|
default=None,
|
|
277
|
-
help="The team to list datasets for. By default, it will use team from config
|
|
264
|
+
help="The team to list datasets for. By default, it will use team from config",
|
|
278
265
|
)
|
|
279
266
|
|
|
280
267
|
rm_dataset_parser = datasets_subparser.add_parser(
|
|
281
|
-
"rm", parents=[parent_parser], description="
|
|
268
|
+
"rm", parents=[parent_parser], description="Remove dataset.", aliases=["remove"]
|
|
282
269
|
)
|
|
283
270
|
rm_dataset_parser.add_argument("name", type=str, help="Dataset name")
|
|
284
271
|
rm_dataset_parser.add_argument(
|
|
@@ -292,7 +279,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
292
279
|
"--force",
|
|
293
280
|
default=False,
|
|
294
281
|
action=BooleanOptionalAction,
|
|
295
|
-
help="Force delete registered dataset with all of
|
|
282
|
+
help="Force delete registered dataset with all of its versions",
|
|
296
283
|
)
|
|
297
284
|
rm_dataset_parser.add_argument(
|
|
298
285
|
"--studio",
|
|
@@ -318,13 +305,11 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
318
305
|
"--team",
|
|
319
306
|
action="store",
|
|
320
307
|
default=None,
|
|
321
|
-
help="The team to delete a dataset. By default, it will use team from config
|
|
308
|
+
help="The team to delete a dataset. By default, it will use team from config",
|
|
322
309
|
)
|
|
323
310
|
|
|
324
311
|
dataset_stats_parser = datasets_subparser.add_parser(
|
|
325
|
-
"stats",
|
|
326
|
-
parents=[parent_parser],
|
|
327
|
-
description="Shows basic dataset stats",
|
|
312
|
+
"stats", parents=[parent_parser], description="Show basic dataset statistics."
|
|
328
313
|
)
|
|
329
314
|
dataset_stats_parser.add_argument("name", type=str, help="Dataset name")
|
|
330
315
|
dataset_stats_parser.add_argument(
|
|
@@ -349,7 +334,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
349
334
|
)
|
|
350
335
|
|
|
351
336
|
parse_ls = subp.add_parser(
|
|
352
|
-
"ls", parents=[parent_parser], description="List storage contents"
|
|
337
|
+
"ls", parents=[parent_parser], description="List storage contents."
|
|
353
338
|
)
|
|
354
339
|
add_sources_arg(parse_ls, nargs="*")
|
|
355
340
|
parse_ls.add_argument(
|
|
@@ -357,7 +342,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
357
342
|
"--long",
|
|
358
343
|
action="count",
|
|
359
344
|
default=0,
|
|
360
|
-
help="List files in
|
|
345
|
+
help="List files in long format",
|
|
361
346
|
)
|
|
362
347
|
parse_ls.add_argument(
|
|
363
348
|
"--studio",
|
|
@@ -383,11 +368,11 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
383
368
|
"--team",
|
|
384
369
|
action="store",
|
|
385
370
|
default=None,
|
|
386
|
-
help="The team to list datasets for. By default, it will use team from config
|
|
371
|
+
help="The team to list datasets for. By default, it will use team from config",
|
|
387
372
|
)
|
|
388
373
|
|
|
389
374
|
parse_du = subp.add_parser(
|
|
390
|
-
"du", parents=[parent_parser], description="Display space usage"
|
|
375
|
+
"du", parents=[parent_parser], description="Display space usage."
|
|
391
376
|
)
|
|
392
377
|
add_sources_arg(parse_du)
|
|
393
378
|
parse_du.add_argument(
|
|
@@ -405,8 +390,8 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
405
390
|
type=int,
|
|
406
391
|
metavar="N",
|
|
407
392
|
help=(
|
|
408
|
-
"Display sizes
|
|
409
|
-
"
|
|
393
|
+
"Display sizes up to N directory levels deep "
|
|
394
|
+
"(default: 0, summarize provided directory only)"
|
|
410
395
|
),
|
|
411
396
|
)
|
|
412
397
|
parse_du.add_argument(
|
|
@@ -417,32 +402,32 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
417
402
|
)
|
|
418
403
|
|
|
419
404
|
parse_find = subp.add_parser(
|
|
420
|
-
"find", parents=[parent_parser], description="Search in a directory hierarchy"
|
|
405
|
+
"find", parents=[parent_parser], description="Search in a directory hierarchy."
|
|
421
406
|
)
|
|
422
407
|
add_sources_arg(parse_find)
|
|
423
408
|
parse_find.add_argument(
|
|
424
409
|
"--name",
|
|
425
410
|
type=str,
|
|
426
411
|
action="append",
|
|
427
|
-
help="
|
|
412
|
+
help="Match filename pattern",
|
|
428
413
|
)
|
|
429
414
|
parse_find.add_argument(
|
|
430
415
|
"--iname",
|
|
431
416
|
type=str,
|
|
432
417
|
action="append",
|
|
433
|
-
help="
|
|
418
|
+
help="Match filename pattern (case insensitive)",
|
|
434
419
|
)
|
|
435
420
|
parse_find.add_argument(
|
|
436
421
|
"--path",
|
|
437
422
|
type=str,
|
|
438
423
|
action="append",
|
|
439
|
-
help="Path to match pattern
|
|
424
|
+
help="Path to match pattern",
|
|
440
425
|
)
|
|
441
426
|
parse_find.add_argument(
|
|
442
427
|
"--ipath",
|
|
443
428
|
type=str,
|
|
444
429
|
action="append",
|
|
445
|
-
help="Like -path but case insensitive
|
|
430
|
+
help="Like -path but case insensitive",
|
|
446
431
|
)
|
|
447
432
|
parse_find.add_argument(
|
|
448
433
|
"--size",
|
|
@@ -450,7 +435,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
450
435
|
help=(
|
|
451
436
|
"Filter by size (+ is greater or equal, - is less or equal). "
|
|
452
437
|
"Specified size is in bytes, or use a suffix like K, M, G for "
|
|
453
|
-
"kilobytes, megabytes, gigabytes, etc
|
|
438
|
+
"kilobytes, megabytes, gigabytes, etc"
|
|
454
439
|
),
|
|
455
440
|
)
|
|
456
441
|
parse_find.add_argument(
|
|
@@ -470,14 +455,14 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
470
455
|
)
|
|
471
456
|
|
|
472
457
|
parse_index = subp.add_parser(
|
|
473
|
-
"index", parents=[parent_parser], description="Index storage location"
|
|
458
|
+
"index", parents=[parent_parser], description="Index storage location."
|
|
474
459
|
)
|
|
475
460
|
add_sources_arg(parse_index)
|
|
476
461
|
|
|
477
462
|
show_parser = subp.add_parser(
|
|
478
463
|
"show",
|
|
479
464
|
parents=[parent_parser],
|
|
480
|
-
description="Create a new dataset with a query script",
|
|
465
|
+
description="Create a new dataset with a query script.",
|
|
481
466
|
)
|
|
482
467
|
show_parser.add_argument("name", type=str, help="Dataset name")
|
|
483
468
|
show_parser.add_argument(
|
|
@@ -493,7 +478,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
493
478
|
query_parser = subp.add_parser(
|
|
494
479
|
"query",
|
|
495
480
|
parents=[parent_parser],
|
|
496
|
-
description="Create a new dataset with a query script",
|
|
481
|
+
description="Create a new dataset with a query script.",
|
|
497
482
|
)
|
|
498
483
|
query_parser.add_argument(
|
|
499
484
|
"script", metavar="<script.py>", type=str, help="Filepath for script"
|
|
@@ -507,7 +492,7 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
507
492
|
metavar="N",
|
|
508
493
|
help=(
|
|
509
494
|
"Use multiprocessing to run any query script UDFs with N worker processes. "
|
|
510
|
-
"N defaults to the CPU count
|
|
495
|
+
"N defaults to the CPU count"
|
|
511
496
|
),
|
|
512
497
|
)
|
|
513
498
|
query_parser.add_argument(
|
|
@@ -520,10 +505,12 @@ def get_parser() -> ArgumentParser: # noqa: PLR0915
|
|
|
520
505
|
)
|
|
521
506
|
|
|
522
507
|
subp.add_parser(
|
|
523
|
-
"clear-cache",
|
|
508
|
+
"clear-cache",
|
|
509
|
+
parents=[parent_parser],
|
|
510
|
+
description="Clear the local file cache.",
|
|
524
511
|
)
|
|
525
512
|
subp.add_parser(
|
|
526
|
-
"gc", parents=[parent_parser], description="Garbage collect temporary tables"
|
|
513
|
+
"gc", parents=[parent_parser], description="Garbage collect temporary tables."
|
|
527
514
|
)
|
|
528
515
|
|
|
529
516
|
subp.add_parser("internal-run-udf", parents=[parent_parser])
|
|
@@ -536,12 +523,12 @@ def add_completion_parser(subparsers, parents):
|
|
|
536
523
|
parser = subparsers.add_parser(
|
|
537
524
|
"completion",
|
|
538
525
|
parents=parents,
|
|
539
|
-
description="Output shell completion script",
|
|
526
|
+
description="Output shell completion script.",
|
|
540
527
|
)
|
|
541
528
|
parser.add_argument(
|
|
542
529
|
"-s",
|
|
543
530
|
"--shell",
|
|
544
|
-
help="Shell syntax for completions
|
|
531
|
+
help="Shell syntax for completions",
|
|
545
532
|
default="bash",
|
|
546
533
|
choices=shtab.SUPPORTED_SHELLS,
|
|
547
534
|
)
|
datachain/cli/parser/job.py
CHANGED
|
@@ -1,19 +1,16 @@
|
|
|
1
1
|
def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
2
|
-
jobs_help = "
|
|
3
|
-
jobs_description =
|
|
4
|
-
"This will help us to run, cancel and view the status of the job in Studio. "
|
|
5
|
-
)
|
|
2
|
+
jobs_help = "Manage jobs in Studio"
|
|
3
|
+
jobs_description = "Commands to manage job execution in Studio."
|
|
6
4
|
jobs_parser = subparsers.add_parser(
|
|
7
5
|
"job", parents=[parent_parser], description=jobs_description, help=jobs_help
|
|
8
6
|
)
|
|
9
7
|
jobs_subparser = jobs_parser.add_subparsers(
|
|
10
8
|
dest="cmd",
|
|
11
|
-
help="Use `
|
|
12
|
-
required=True,
|
|
9
|
+
help="Use `datachain auth CMD --help` to display command-specific help",
|
|
13
10
|
)
|
|
14
11
|
|
|
15
12
|
studio_run_help = "Run a job in Studio"
|
|
16
|
-
studio_run_description = "
|
|
13
|
+
studio_run_description = "Run a job in Studio."
|
|
17
14
|
|
|
18
15
|
studio_run_parser = jobs_subparser.add_parser(
|
|
19
16
|
"run",
|
|
@@ -25,56 +22,56 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
25
22
|
studio_run_parser.add_argument(
|
|
26
23
|
"query_file",
|
|
27
24
|
action="store",
|
|
28
|
-
help="
|
|
25
|
+
help="Query file to run",
|
|
29
26
|
)
|
|
30
27
|
|
|
31
28
|
studio_run_parser.add_argument(
|
|
32
29
|
"--team",
|
|
33
30
|
action="store",
|
|
34
31
|
default=None,
|
|
35
|
-
help="
|
|
32
|
+
help="Team to run job for (default: from config)",
|
|
36
33
|
)
|
|
37
34
|
studio_run_parser.add_argument(
|
|
38
35
|
"--env-file",
|
|
39
36
|
action="store",
|
|
40
|
-
help="File
|
|
37
|
+
help="File with environment variables for the job",
|
|
41
38
|
)
|
|
42
39
|
|
|
43
40
|
studio_run_parser.add_argument(
|
|
44
41
|
"--env",
|
|
45
42
|
nargs="+",
|
|
46
|
-
help="Environment
|
|
43
|
+
help="Environment variables in KEY=VALUE format",
|
|
47
44
|
)
|
|
48
45
|
|
|
49
46
|
studio_run_parser.add_argument(
|
|
50
47
|
"--workers",
|
|
51
48
|
type=int,
|
|
52
|
-
help="Number of workers
|
|
49
|
+
help="Number of workers for the job",
|
|
53
50
|
)
|
|
54
51
|
studio_run_parser.add_argument(
|
|
55
52
|
"--files",
|
|
56
53
|
nargs="+",
|
|
57
|
-
help="
|
|
54
|
+
help="Additional files to include in the job",
|
|
58
55
|
)
|
|
59
56
|
studio_run_parser.add_argument(
|
|
60
57
|
"--python-version",
|
|
61
58
|
action="store",
|
|
62
|
-
help="Python version
|
|
59
|
+
help="Python version for the job (e.g., 3.9, 3.10, 3.11)",
|
|
63
60
|
)
|
|
64
61
|
studio_run_parser.add_argument(
|
|
65
62
|
"--req-file",
|
|
66
63
|
action="store",
|
|
67
|
-
help="
|
|
64
|
+
help="Python requirements file",
|
|
68
65
|
)
|
|
69
66
|
|
|
70
67
|
studio_run_parser.add_argument(
|
|
71
68
|
"--req",
|
|
72
69
|
nargs="+",
|
|
73
|
-
help="Python package
|
|
70
|
+
help="Python package requirements",
|
|
74
71
|
)
|
|
75
72
|
|
|
76
73
|
studio_cancel_help = "Cancel a job in Studio"
|
|
77
|
-
studio_cancel_description = "
|
|
74
|
+
studio_cancel_description = "Cancel a running job in Studio."
|
|
78
75
|
|
|
79
76
|
studio_cancel_parser = jobs_subparser.add_parser(
|
|
80
77
|
"cancel",
|
|
@@ -86,19 +83,17 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
86
83
|
studio_cancel_parser.add_argument(
|
|
87
84
|
"job_id",
|
|
88
85
|
action="store",
|
|
89
|
-
help="
|
|
86
|
+
help="Job ID to cancel",
|
|
90
87
|
)
|
|
91
88
|
studio_cancel_parser.add_argument(
|
|
92
89
|
"--team",
|
|
93
90
|
action="store",
|
|
94
91
|
default=None,
|
|
95
|
-
help="
|
|
92
|
+
help="Team to cancel job for (default: from config)",
|
|
96
93
|
)
|
|
97
94
|
|
|
98
|
-
studio_log_help = "Show
|
|
99
|
-
studio_log_description =
|
|
100
|
-
"This will display the logs and latest status of jobs in Studio"
|
|
101
|
-
)
|
|
95
|
+
studio_log_help = "Show job logs and status in Studio"
|
|
96
|
+
studio_log_description = "Display logs and current status of jobs in Studio."
|
|
102
97
|
|
|
103
98
|
studio_log_parser = jobs_subparser.add_parser(
|
|
104
99
|
"logs",
|
|
@@ -110,11 +105,11 @@ def add_jobs_parser(subparsers, parent_parser) -> None:
|
|
|
110
105
|
studio_log_parser.add_argument(
|
|
111
106
|
"job_id",
|
|
112
107
|
action="store",
|
|
113
|
-
help="
|
|
108
|
+
help="Job ID to show logs for",
|
|
114
109
|
)
|
|
115
110
|
studio_log_parser.add_argument(
|
|
116
111
|
"--team",
|
|
117
112
|
action="store",
|
|
118
113
|
default=None,
|
|
119
|
-
help="
|
|
114
|
+
help="Team to check logs for (default: from config)",
|
|
120
115
|
)
|