metaflow 2.11.14__py2.py3-none-any.whl → 2.11.15__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/cli.py +0 -120
- metaflow/datastore/datastore_set.py +1 -1
- metaflow/datastore/flow_datastore.py +32 -6
- metaflow/datastore/task_datastore.py +50 -0
- metaflow/plugins/__init__.py +1 -0
- metaflow/plugins/aws/batch/batch_cli.py +6 -4
- metaflow/plugins/datatools/s3/s3.py +8 -8
- metaflow/plugins/logs_cli.py +358 -0
- metaflow/version.py +1 -1
- {metaflow-2.11.14.dist-info → metaflow-2.11.15.dist-info}/METADATA +2 -2
- {metaflow-2.11.14.dist-info → metaflow-2.11.15.dist-info}/RECORD +15 -14
- {metaflow-2.11.14.dist-info → metaflow-2.11.15.dist-info}/LICENSE +0 -0
- {metaflow-2.11.14.dist-info → metaflow-2.11.15.dist-info}/WHEEL +0 -0
- {metaflow-2.11.14.dist-info → metaflow-2.11.15.dist-info}/entry_points.txt +0 -0
- {metaflow-2.11.14.dist-info → metaflow-2.11.15.dist-info}/top_level.txt +0 -0
metaflow/cli.py
CHANGED
@@ -287,126 +287,6 @@ def dump(obj, input_path, private=None, max_value_size=None, include=None, file=
|
|
287
287
|
echo("Artifacts written to *%s*" % file)
|
288
288
|
|
289
289
|
|
290
|
-
@cli.command(
|
291
|
-
help="Show stdout/stderr produced by a task or all tasks in a step. "
|
292
|
-
"The format for input-path is either <run_id>/<step_name> or "
|
293
|
-
"<run_id>/<step_name>/<task_id>."
|
294
|
-
)
|
295
|
-
@click.argument("input-path")
|
296
|
-
@click.option(
|
297
|
-
"--stdout/--no-stdout",
|
298
|
-
default=False,
|
299
|
-
show_default=True,
|
300
|
-
help="Show stdout of the task.",
|
301
|
-
)
|
302
|
-
@click.option(
|
303
|
-
"--stderr/--no-stderr",
|
304
|
-
default=False,
|
305
|
-
show_default=True,
|
306
|
-
help="Show stderr of the task.",
|
307
|
-
)
|
308
|
-
@click.option(
|
309
|
-
"--both/--no-both",
|
310
|
-
default=True,
|
311
|
-
show_default=True,
|
312
|
-
help="Show both stdout and stderr of the task.",
|
313
|
-
)
|
314
|
-
@click.option(
|
315
|
-
"--timestamps/--no-timestamps",
|
316
|
-
default=False,
|
317
|
-
show_default=True,
|
318
|
-
help="Show timestamps.",
|
319
|
-
)
|
320
|
-
@click.pass_obj
|
321
|
-
def logs(obj, input_path, stdout=None, stderr=None, both=None, timestamps=False):
|
322
|
-
types = set()
|
323
|
-
if stdout:
|
324
|
-
types.add("stdout")
|
325
|
-
both = False
|
326
|
-
if stderr:
|
327
|
-
types.add("stderr")
|
328
|
-
both = False
|
329
|
-
if both:
|
330
|
-
types.update(("stdout", "stderr"))
|
331
|
-
|
332
|
-
streams = list(sorted(types, reverse=True))
|
333
|
-
|
334
|
-
# Pathspec can either be run_id/step_name or run_id/step_name/task_id.
|
335
|
-
parts = input_path.split("/")
|
336
|
-
if len(parts) == 2:
|
337
|
-
run_id, step_name = parts
|
338
|
-
task_id = None
|
339
|
-
elif len(parts) == 3:
|
340
|
-
run_id, step_name, task_id = parts
|
341
|
-
else:
|
342
|
-
raise CommandException(
|
343
|
-
"input_path should either be run_id/step_name "
|
344
|
-
"or run_id/step_name/task_id"
|
345
|
-
)
|
346
|
-
|
347
|
-
datastore_set = TaskDataStoreSet(
|
348
|
-
obj.flow_datastore, run_id, steps=[step_name], allow_not_done=True
|
349
|
-
)
|
350
|
-
if task_id:
|
351
|
-
ds_list = [
|
352
|
-
TaskDataStore(
|
353
|
-
obj.flow_datastore,
|
354
|
-
run_id=run_id,
|
355
|
-
step_name=step_name,
|
356
|
-
task_id=task_id,
|
357
|
-
mode="r",
|
358
|
-
allow_not_done=True,
|
359
|
-
)
|
360
|
-
]
|
361
|
-
else:
|
362
|
-
ds_list = list(datastore_set) # get all tasks
|
363
|
-
|
364
|
-
if ds_list:
|
365
|
-
|
366
|
-
def echo_unicode(line, **kwargs):
|
367
|
-
click.secho(line.decode("UTF-8", errors="replace"), **kwargs)
|
368
|
-
|
369
|
-
# old style logs are non mflog-style logs
|
370
|
-
maybe_old_style = True
|
371
|
-
for ds in ds_list:
|
372
|
-
echo(
|
373
|
-
"Dumping logs of run_id=*{run_id}* "
|
374
|
-
"step=*{step}* task_id=*{task_id}*".format(
|
375
|
-
run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id
|
376
|
-
),
|
377
|
-
fg="magenta",
|
378
|
-
)
|
379
|
-
|
380
|
-
for stream in streams:
|
381
|
-
echo(stream, bold=True)
|
382
|
-
logs = ds.load_logs(LOG_SOURCES, stream)
|
383
|
-
if any(data for _, data in logs):
|
384
|
-
# attempt to read new, mflog-style logs
|
385
|
-
for line in mflog.merge_logs([blob for _, blob in logs]):
|
386
|
-
if timestamps:
|
387
|
-
ts = mflog.utc_to_local(line.utc_tstamp)
|
388
|
-
tstamp = ts.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
389
|
-
click.secho(tstamp + " ", fg=LOGGER_TIMESTAMP, nl=False)
|
390
|
-
echo_unicode(line.msg)
|
391
|
-
maybe_old_style = False
|
392
|
-
elif maybe_old_style:
|
393
|
-
# if they are not available, we may be looking at
|
394
|
-
# a legacy run (unless we have seen new-style data already
|
395
|
-
# for another stream). This return an empty string if
|
396
|
-
# nothing is found
|
397
|
-
log = ds.load_log_legacy(stream)
|
398
|
-
if log and timestamps:
|
399
|
-
raise CommandException(
|
400
|
-
"We can't show --timestamps for old runs. Sorry!"
|
401
|
-
)
|
402
|
-
echo_unicode(log, nl=False)
|
403
|
-
else:
|
404
|
-
raise CommandException(
|
405
|
-
"No Tasks found at the given path -- "
|
406
|
-
"either none exist or none have started yet"
|
407
|
-
)
|
408
|
-
|
409
|
-
|
410
290
|
# TODO - move step and init under a separate 'internal' subcommand
|
411
291
|
|
412
292
|
|
@@ -22,7 +22,7 @@ class TaskDataStoreSet(object):
|
|
22
22
|
prefetch_data_artifacts=None,
|
23
23
|
allow_not_done=False,
|
24
24
|
):
|
25
|
-
self.task_datastores = flow_datastore.
|
25
|
+
self.task_datastores = flow_datastore.get_task_datastores(
|
26
26
|
run_id, steps=steps, pathspecs=pathspecs, allow_not_done=allow_not_done
|
27
27
|
)
|
28
28
|
|
@@ -67,8 +67,15 @@ class FlowDataStore(object):
|
|
67
67
|
def datastore_root(self):
|
68
68
|
return self._storage_impl.datastore_root
|
69
69
|
|
70
|
-
def
|
71
|
-
self,
|
70
|
+
def get_task_datastores(
|
71
|
+
self,
|
72
|
+
run_id=None,
|
73
|
+
steps=None,
|
74
|
+
pathspecs=None,
|
75
|
+
allow_not_done=False,
|
76
|
+
attempt=None,
|
77
|
+
include_prior=False,
|
78
|
+
mode="r",
|
72
79
|
):
|
73
80
|
"""
|
74
81
|
Return a list of TaskDataStore for a subset of the tasks.
|
@@ -93,6 +100,12 @@ class FlowDataStore(object):
|
|
93
100
|
allow_not_done : bool, optional
|
94
101
|
If True, returns the latest attempt of a task even if that attempt
|
95
102
|
wasn't marked as done, by default False
|
103
|
+
attempt : int, optional
|
104
|
+
Attempt number of the tasks to return. If not provided, returns latest attempt.
|
105
|
+
include_prior : boolean, default False
|
106
|
+
If True, returns all attempts up to and including attempt.
|
107
|
+
mode : str, default "r"
|
108
|
+
Mode to initialize the returned TaskDataStores in.
|
96
109
|
|
97
110
|
Returns
|
98
111
|
-------
|
@@ -126,8 +139,13 @@ class FlowDataStore(object):
|
|
126
139
|
if task.is_file is False
|
127
140
|
]
|
128
141
|
urls = []
|
142
|
+
# parse content urls for specific attempt only, or for all attempts in max range
|
143
|
+
attempt_range = range(metaflow_config.MAX_ATTEMPTS)
|
144
|
+
# we have no reason to check for attempts greater than MAX_ATTEMPTS, as they do not exist.
|
145
|
+
if attempt is not None and attempt <= metaflow_config.MAX_ATTEMPTS - 1:
|
146
|
+
attempt_range = range(attempt + 1) if include_prior else [attempt]
|
129
147
|
for task_url in task_urls:
|
130
|
-
for attempt in
|
148
|
+
for attempt in attempt_range:
|
131
149
|
for suffix in [
|
132
150
|
TaskDataStore.METADATA_DATA_SUFFIX,
|
133
151
|
TaskDataStore.METADATA_ATTEMPT_SUFFIX,
|
@@ -168,11 +186,19 @@ class FlowDataStore(object):
|
|
168
186
|
for (run, step, task), attempt in latest_started_attempts.items()
|
169
187
|
)
|
170
188
|
if allow_not_done:
|
171
|
-
latest_to_fetch =
|
189
|
+
latest_to_fetch = (
|
190
|
+
done_attempts.union(latest_started_attempts)
|
191
|
+
if include_prior
|
192
|
+
else latest_started_attempts
|
193
|
+
)
|
172
194
|
else:
|
173
|
-
latest_to_fetch =
|
195
|
+
latest_to_fetch = (
|
196
|
+
done_attempts
|
197
|
+
if include_prior
|
198
|
+
else (latest_started_attempts & done_attempts)
|
199
|
+
)
|
174
200
|
latest_to_fetch = [
|
175
|
-
(v[0], v[1], v[2], v[3], data_objs.get(v),
|
201
|
+
(v[0], v[1], v[2], v[3], data_objs.get(v), mode, allow_not_done)
|
176
202
|
for v in latest_to_fetch
|
177
203
|
]
|
178
204
|
return list(itertools.starmap(self.get_task_datastore, latest_to_fetch))
|
@@ -173,6 +173,26 @@ class TaskDataStore(object):
|
|
173
173
|
if data_obj is not None:
|
174
174
|
self._objects = data_obj.get("objects", {})
|
175
175
|
self._info = data_obj.get("info", {})
|
176
|
+
elif self._mode == "d":
|
177
|
+
self._objects = {}
|
178
|
+
self._info = {}
|
179
|
+
|
180
|
+
if self._attempt is None:
|
181
|
+
for i in range(metaflow_config.MAX_ATTEMPTS):
|
182
|
+
check_meta = self._metadata_name_for_attempt(
|
183
|
+
self.METADATA_ATTEMPT_SUFFIX, i
|
184
|
+
)
|
185
|
+
if self.has_metadata(check_meta, add_attempt=False):
|
186
|
+
self._attempt = i
|
187
|
+
|
188
|
+
# Do not allow destructive operations on the datastore if attempt is still in flight
|
189
|
+
# and we explicitly did not allow operating on running tasks.
|
190
|
+
if not allow_not_done and not self.has_metadata(self.METADATA_DONE_SUFFIX):
|
191
|
+
raise DataException(
|
192
|
+
"No completed attempts of the task was found for task '%s'"
|
193
|
+
% self._path
|
194
|
+
)
|
195
|
+
|
176
196
|
else:
|
177
197
|
raise DataException("Unknown datastore mode: '%s'" % self._mode)
|
178
198
|
|
@@ -750,6 +770,36 @@ class TaskDataStore(object):
|
|
750
770
|
to_store_dict[n] = data
|
751
771
|
self._save_file(to_store_dict)
|
752
772
|
|
773
|
+
@require_mode("d")
|
774
|
+
def scrub_logs(self, logsources, stream, attempt_override=None):
|
775
|
+
path_logsources = {
|
776
|
+
self._metadata_name_for_attempt(
|
777
|
+
self._get_log_location(s, stream),
|
778
|
+
attempt_override=attempt_override,
|
779
|
+
): s
|
780
|
+
for s in logsources
|
781
|
+
}
|
782
|
+
|
783
|
+
# Legacy log paths
|
784
|
+
legacy_log = self._metadata_name_for_attempt(
|
785
|
+
"%s.log" % stream, attempt_override
|
786
|
+
)
|
787
|
+
path_logsources[legacy_log] = stream
|
788
|
+
|
789
|
+
existing_paths = [
|
790
|
+
path
|
791
|
+
for path in path_logsources.keys()
|
792
|
+
if self.has_metadata(path, add_attempt=False)
|
793
|
+
]
|
794
|
+
|
795
|
+
# Replace log contents with [REDACTED source stream]
|
796
|
+
to_store_dict = {
|
797
|
+
path: bytes("[REDACTED %s %s]" % (path_logsources[path], stream), "utf-8")
|
798
|
+
for path in existing_paths
|
799
|
+
}
|
800
|
+
|
801
|
+
self._save_file(to_store_dict, add_attempt=False, allow_overwrite=True)
|
802
|
+
|
753
803
|
@require_mode("r")
|
754
804
|
def load_log_legacy(self, stream, attempt_override=None):
|
755
805
|
"""
|
metaflow/plugins/__init__.py
CHANGED
@@ -10,7 +10,7 @@ from metaflow.exception import CommandException, METAFLOW_EXIT_DISALLOW_RETRY
|
|
10
10
|
from metaflow.metadata.util import sync_local_metadata_from_datastore
|
11
11
|
from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
|
12
12
|
from metaflow.mflog import TASK_LOG_SOURCE
|
13
|
-
|
13
|
+
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
|
14
14
|
from .batch import Batch, BatchKilledException
|
15
15
|
|
16
16
|
|
@@ -150,8 +150,10 @@ def kill(ctx, run_id, user, my_runs):
|
|
150
150
|
@click.option("--tmpfs-tempdir", is_flag=True, help="tmpfs requirement for AWS Batch.")
|
151
151
|
@click.option("--tmpfs-size", help="tmpfs requirement for AWS Batch.")
|
152
152
|
@click.option("--tmpfs-path", help="tmpfs requirement for AWS Batch.")
|
153
|
-
#
|
154
|
-
@click.option(
|
153
|
+
# NOTE: ubf-context is not explicitly used, but @parallel decorator tries to pass this so keep it for now
|
154
|
+
@click.option(
|
155
|
+
"--ubf-context", default=None, type=click.Choice(["none", UBF_CONTROL, UBF_TASK])
|
156
|
+
)
|
155
157
|
@click.option("--host-volumes", multiple=True)
|
156
158
|
@click.option("--efs-volumes", multiple=True)
|
157
159
|
@click.option(
|
@@ -344,7 +346,7 @@ def step(
|
|
344
346
|
log_options=log_options,
|
345
347
|
num_parallel=num_parallel,
|
346
348
|
)
|
347
|
-
except Exception
|
349
|
+
except Exception:
|
348
350
|
traceback.print_exc()
|
349
351
|
_sync_metadata()
|
350
352
|
sys.exit(METAFLOW_EXIT_DISALLOW_RETRY)
|
@@ -1245,12 +1245,12 @@ class S3(object):
|
|
1245
1245
|
|
1246
1246
|
def _store():
|
1247
1247
|
for key_obj in key_objs:
|
1248
|
-
if isinstance(key_obj,
|
1249
|
-
key = key_obj[0]
|
1250
|
-
obj = key_obj[1]
|
1251
|
-
else:
|
1248
|
+
if isinstance(key_obj, S3PutObject):
|
1252
1249
|
key = key_obj.key
|
1253
1250
|
obj = key_obj.value
|
1251
|
+
else:
|
1252
|
+
key = key_obj[0]
|
1253
|
+
obj = key_obj[1]
|
1254
1254
|
store_info = {
|
1255
1255
|
"key": key,
|
1256
1256
|
"content_type": getattr(key_obj, "content_type", None),
|
@@ -1319,12 +1319,12 @@ class S3(object):
|
|
1319
1319
|
|
1320
1320
|
def _check():
|
1321
1321
|
for key_path in key_paths:
|
1322
|
-
if isinstance(key_path,
|
1323
|
-
key = key_path[0]
|
1324
|
-
path = key_path[1]
|
1325
|
-
else:
|
1322
|
+
if isinstance(key_path, S3PutObject):
|
1326
1323
|
key = key_path.key
|
1327
1324
|
path = key_path.path
|
1325
|
+
else:
|
1326
|
+
key = key_path[0]
|
1327
|
+
path = key_path[1]
|
1328
1328
|
store_info = {
|
1329
1329
|
"key": key,
|
1330
1330
|
"content_type": getattr(key_path, "content_type", None),
|
@@ -0,0 +1,358 @@
|
|
1
|
+
from metaflow._vendor import click
|
2
|
+
from metaflow.cli import LOGGER_TIMESTAMP
|
3
|
+
|
4
|
+
from ..exception import CommandException
|
5
|
+
from ..datastore import TaskDataStoreSet, TaskDataStore
|
6
|
+
|
7
|
+
|
8
|
+
from ..mflog import mflog, LOG_SOURCES
|
9
|
+
|
10
|
+
# main motivation from https://github.com/pallets/click/issues/430
|
11
|
+
# in order to support a default command being called for a Click group.
|
12
|
+
#
|
13
|
+
# NOTE: We need this in order to not introduce breaking changes to existing CLI, as we wanted to
|
14
|
+
# nest both existing `logs` and the new `logs scrub` under a shared group, but `logs` already has
|
15
|
+
# a well defined behavior of showing the logs.
|
16
|
+
class CustomGroup(click.Group):
|
17
|
+
def __init__(self, name=None, commands=None, default_cmd=None, **attrs):
|
18
|
+
super(CustomGroup, self).__init__(name, commands, **attrs)
|
19
|
+
self.default_cmd = default_cmd
|
20
|
+
|
21
|
+
def get_command(self, ctx, cmd_name):
|
22
|
+
if cmd_name not in self.list_commands(ctx):
|
23
|
+
# input from the CLI does not match a command, so we pass that
|
24
|
+
# as the args to the default command instead.
|
25
|
+
ctx.passed_cmd = cmd_name
|
26
|
+
cmd_name = self.default_cmd
|
27
|
+
return super(CustomGroup, self).get_command(ctx, cmd_name)
|
28
|
+
|
29
|
+
def parse_args(self, ctx, args):
|
30
|
+
# We first try to parse args as is, to determine whether we need to fall back to the default commmand
|
31
|
+
# if any options are supplied, the parse will fail, as the group does not support the options.
|
32
|
+
# In this case we fallback to the default command, inserting that as the first arg and parsing again.
|
33
|
+
# copy args as trying to parse will destroy them.
|
34
|
+
original_args = list(args)
|
35
|
+
try:
|
36
|
+
super().parse_args(ctx, args)
|
37
|
+
args_parseable = True
|
38
|
+
except Exception:
|
39
|
+
args_parseable = False
|
40
|
+
if not args or not args_parseable:
|
41
|
+
original_args.insert(0, self.default_cmd)
|
42
|
+
return super().parse_args(ctx, original_args)
|
43
|
+
|
44
|
+
def resolve_command(self, ctx, args):
|
45
|
+
cmd_name, cmd_obj, args = super(CustomGroup, self).resolve_command(ctx, args)
|
46
|
+
passed_cmd = getattr(ctx, "passed_cmd", None)
|
47
|
+
if passed_cmd is not None:
|
48
|
+
args.insert(0, passed_cmd)
|
49
|
+
|
50
|
+
return cmd_name, cmd_obj, args
|
51
|
+
|
52
|
+
def format_commands(self, ctx, formatter):
|
53
|
+
formatter = CustomFormatter(self.default_cmd, formatter)
|
54
|
+
return super(CustomGroup, self).format_commands(ctx, formatter)
|
55
|
+
|
56
|
+
|
57
|
+
class CustomFormatter:
|
58
|
+
def __init__(self, default_cmd, original_formatter) -> None:
|
59
|
+
self.default_cmd = default_cmd
|
60
|
+
self.formatter = original_formatter
|
61
|
+
|
62
|
+
def __getattr__(self, name):
|
63
|
+
return getattr(self.formatter, name)
|
64
|
+
|
65
|
+
def write_dl(self, rows):
|
66
|
+
def _format(dup):
|
67
|
+
cmd, help = dup
|
68
|
+
if cmd == self.default_cmd:
|
69
|
+
cmd = cmd + " [Default]"
|
70
|
+
return (cmd, help)
|
71
|
+
|
72
|
+
rows = [_format(dup) for dup in rows]
|
73
|
+
|
74
|
+
return self.formatter.write_dl(rows)
|
75
|
+
|
76
|
+
|
77
|
+
@click.group()
|
78
|
+
def cli():
|
79
|
+
pass
|
80
|
+
|
81
|
+
|
82
|
+
@cli.group(cls=CustomGroup, help="Commands related to logs", default_cmd="show")
|
83
|
+
@click.pass_context
|
84
|
+
def logs(ctx):
|
85
|
+
# the logger is configured in cli.py
|
86
|
+
global echo
|
87
|
+
echo = ctx.obj.echo
|
88
|
+
|
89
|
+
|
90
|
+
@logs.command(
|
91
|
+
help="Show stdout/stderr produced by a task or all tasks in a step. "
|
92
|
+
"The format for input-path is either <run_id>/<step_name> or "
|
93
|
+
"<run_id>/<step_name>/<task_id>."
|
94
|
+
)
|
95
|
+
@click.argument("input-path")
|
96
|
+
@click.option(
|
97
|
+
"--stdout/--no-stdout",
|
98
|
+
default=False,
|
99
|
+
show_default=True,
|
100
|
+
help="Show stdout of the task.",
|
101
|
+
)
|
102
|
+
@click.option(
|
103
|
+
"--stderr/--no-stderr",
|
104
|
+
default=False,
|
105
|
+
show_default=True,
|
106
|
+
help="Show stderr of the task.",
|
107
|
+
)
|
108
|
+
@click.option(
|
109
|
+
"--both/--no-both",
|
110
|
+
default=True,
|
111
|
+
show_default=True,
|
112
|
+
help="Show both stdout and stderr of the task.",
|
113
|
+
)
|
114
|
+
@click.option(
|
115
|
+
"--timestamps/--no-timestamps",
|
116
|
+
default=False,
|
117
|
+
show_default=True,
|
118
|
+
help="Show timestamps.",
|
119
|
+
)
|
120
|
+
@click.option(
|
121
|
+
"--attempt",
|
122
|
+
default=None,
|
123
|
+
type=int,
|
124
|
+
show_default=False,
|
125
|
+
help="Attempt number of a task to show, defaults to the latest attempt.",
|
126
|
+
)
|
127
|
+
@click.pass_obj
|
128
|
+
def show(
|
129
|
+
obj, input_path, stdout=None, stderr=None, both=None, timestamps=False, attempt=None
|
130
|
+
):
|
131
|
+
types = set()
|
132
|
+
if stdout:
|
133
|
+
types.add("stdout")
|
134
|
+
both = False
|
135
|
+
if stderr:
|
136
|
+
types.add("stderr")
|
137
|
+
both = False
|
138
|
+
if both:
|
139
|
+
types.update(("stdout", "stderr"))
|
140
|
+
|
141
|
+
streams = list(sorted(types, reverse=True))
|
142
|
+
|
143
|
+
# Pathspec can either be run_id/step_name or run_id/step_name/task_id.
|
144
|
+
parts = input_path.split("/")
|
145
|
+
if len(parts) == 2:
|
146
|
+
run_id, step_name = parts
|
147
|
+
task_id = None
|
148
|
+
elif len(parts) == 3:
|
149
|
+
run_id, step_name, task_id = parts
|
150
|
+
else:
|
151
|
+
raise CommandException(
|
152
|
+
"input_path should either be run_id/step_name "
|
153
|
+
"or run_id/step_name/task_id"
|
154
|
+
)
|
155
|
+
|
156
|
+
datastore_set = TaskDataStoreSet(
|
157
|
+
obj.flow_datastore, run_id, steps=[step_name], allow_not_done=True
|
158
|
+
)
|
159
|
+
if task_id:
|
160
|
+
ds_list = [
|
161
|
+
TaskDataStore(
|
162
|
+
obj.flow_datastore,
|
163
|
+
run_id=run_id,
|
164
|
+
step_name=step_name,
|
165
|
+
task_id=task_id,
|
166
|
+
mode="r",
|
167
|
+
allow_not_done=True,
|
168
|
+
)
|
169
|
+
]
|
170
|
+
else:
|
171
|
+
ds_list = list(datastore_set) # get all tasks
|
172
|
+
|
173
|
+
if ds_list:
|
174
|
+
|
175
|
+
def echo_unicode(line, **kwargs):
|
176
|
+
click.secho(line.decode("UTF-8", errors="replace"), **kwargs)
|
177
|
+
|
178
|
+
# old style logs are non mflog-style logs
|
179
|
+
maybe_old_style = True
|
180
|
+
for ds in ds_list:
|
181
|
+
echo(
|
182
|
+
"Dumping logs of run_id=*{run_id}* "
|
183
|
+
"step=*{step}* task_id=*{task_id}*".format(
|
184
|
+
run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id
|
185
|
+
),
|
186
|
+
fg="magenta",
|
187
|
+
)
|
188
|
+
|
189
|
+
for stream in streams:
|
190
|
+
echo(stream, bold=True)
|
191
|
+
logs = ds.load_logs(LOG_SOURCES, stream, attempt_override=attempt)
|
192
|
+
if any(data for _, data in logs):
|
193
|
+
# attempt to read new, mflog-style logs
|
194
|
+
for line in mflog.merge_logs([blob for _, blob in logs]):
|
195
|
+
if timestamps:
|
196
|
+
ts = mflog.utc_to_local(line.utc_tstamp)
|
197
|
+
tstamp = ts.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
198
|
+
click.secho(tstamp + " ", fg=LOGGER_TIMESTAMP, nl=False)
|
199
|
+
echo_unicode(line.msg)
|
200
|
+
maybe_old_style = False
|
201
|
+
elif maybe_old_style:
|
202
|
+
# if they are not available, we may be looking at
|
203
|
+
# a legacy run (unless we have seen new-style data already
|
204
|
+
# for another stream). This return an empty string if
|
205
|
+
# nothing is found
|
206
|
+
log = ds.load_log_legacy(stream, attempt_override=attempt)
|
207
|
+
if log and timestamps:
|
208
|
+
raise CommandException(
|
209
|
+
"We can't show --timestamps for old runs. Sorry!"
|
210
|
+
)
|
211
|
+
echo_unicode(log, nl=False)
|
212
|
+
else:
|
213
|
+
raise CommandException(
|
214
|
+
"No Tasks found at the given path -- "
|
215
|
+
"either none exist or none have started yet"
|
216
|
+
)
|
217
|
+
|
218
|
+
|
219
|
+
@logs.command(
|
220
|
+
help="Scrub stdout/stderr produced by a task or all tasks in a step. "
|
221
|
+
"The format for input-path is either <run_id>/<step_name> or "
|
222
|
+
"<run_id>/<step_name>/<task_id>."
|
223
|
+
)
|
224
|
+
@click.argument("input-path")
|
225
|
+
@click.option(
|
226
|
+
"--stdout/--no-stdout",
|
227
|
+
default=False,
|
228
|
+
show_default=True,
|
229
|
+
help="Scrub stdout of the step or task.",
|
230
|
+
)
|
231
|
+
@click.option(
|
232
|
+
"--stderr/--no-stderr",
|
233
|
+
default=False,
|
234
|
+
show_default=True,
|
235
|
+
help="Scrub stderr of the step or task.",
|
236
|
+
)
|
237
|
+
@click.option(
|
238
|
+
"--both/--no-both",
|
239
|
+
default=True,
|
240
|
+
show_default=True,
|
241
|
+
help="Scrub both stdout and stderr of the step or task.",
|
242
|
+
)
|
243
|
+
@click.option(
|
244
|
+
"--attempt",
|
245
|
+
default=None,
|
246
|
+
type=int,
|
247
|
+
show_default=False,
|
248
|
+
help="Attempt number of a task to scrub, defaults to the latest attempt.",
|
249
|
+
)
|
250
|
+
@click.option(
|
251
|
+
"--latest/--all",
|
252
|
+
default=True,
|
253
|
+
show_default=False,
|
254
|
+
help="Scrub latest/all attempts of a step or task",
|
255
|
+
)
|
256
|
+
@click.option(
|
257
|
+
"--include-not-done",
|
258
|
+
default=False,
|
259
|
+
show_default=False,
|
260
|
+
is_flag=True,
|
261
|
+
help="Also scrub steps or tasks that are not done. Use this for tasks that did not finish correctly, and could not otherwise be scrubbed.",
|
262
|
+
)
|
263
|
+
@click.pass_obj
|
264
|
+
def scrub(
|
265
|
+
obj,
|
266
|
+
input_path,
|
267
|
+
stdout=None,
|
268
|
+
stderr=None,
|
269
|
+
both=None,
|
270
|
+
attempt=None,
|
271
|
+
latest=None,
|
272
|
+
include_not_done=None,
|
273
|
+
):
|
274
|
+
types = set()
|
275
|
+
if stdout:
|
276
|
+
types.add("stdout")
|
277
|
+
both = False
|
278
|
+
if stderr:
|
279
|
+
types.add("stderr")
|
280
|
+
both = False
|
281
|
+
if both:
|
282
|
+
types.update(("stdout", "stderr"))
|
283
|
+
|
284
|
+
streams = list(sorted(types, reverse=True))
|
285
|
+
|
286
|
+
# Pathspec can either be run_id/step_name or run_id/step_name/task_id.
|
287
|
+
parts = input_path.split("/")
|
288
|
+
if len(parts) == 2:
|
289
|
+
run_id, step_name = parts
|
290
|
+
task_id = None
|
291
|
+
elif len(parts) == 3:
|
292
|
+
run_id, step_name, task_id = parts
|
293
|
+
else:
|
294
|
+
raise CommandException(
|
295
|
+
"input_path should either be run_id/step_name "
|
296
|
+
"or run_id/step_name/task_id"
|
297
|
+
)
|
298
|
+
|
299
|
+
if task_id:
|
300
|
+
if latest:
|
301
|
+
ds_list = obj.flow_datastore.get_task_datastores(
|
302
|
+
pathspecs=[input_path],
|
303
|
+
attempt=attempt,
|
304
|
+
mode="d",
|
305
|
+
allow_not_done=include_not_done,
|
306
|
+
)
|
307
|
+
else:
|
308
|
+
ds_list = obj.flow_datastore.get_task_datastores(
|
309
|
+
pathspecs=[input_path],
|
310
|
+
attempt=attempt,
|
311
|
+
mode="d",
|
312
|
+
allow_not_done=include_not_done,
|
313
|
+
include_prior=True,
|
314
|
+
)
|
315
|
+
else:
|
316
|
+
if latest:
|
317
|
+
ds_list = obj.flow_datastore.get_task_datastores(
|
318
|
+
run_id=run_id,
|
319
|
+
steps=[step_name],
|
320
|
+
attempt=attempt,
|
321
|
+
mode="d",
|
322
|
+
allow_not_done=include_not_done,
|
323
|
+
)
|
324
|
+
else:
|
325
|
+
ds_list = obj.flow_datastore.get_task_datastores(
|
326
|
+
run_id=run_id,
|
327
|
+
steps=[step_name],
|
328
|
+
attempt=attempt,
|
329
|
+
mode="d",
|
330
|
+
allow_not_done=include_not_done,
|
331
|
+
include_prior=True,
|
332
|
+
)
|
333
|
+
|
334
|
+
if ds_list:
|
335
|
+
for ds in ds_list:
|
336
|
+
failures = []
|
337
|
+
for stream in streams:
|
338
|
+
try:
|
339
|
+
ds.scrub_logs(LOG_SOURCES, stream)
|
340
|
+
except Exception:
|
341
|
+
failures.append(stream)
|
342
|
+
if failures:
|
343
|
+
obj.echo_always(
|
344
|
+
"Failed to scrub %s - attempt %s : *%s*"
|
345
|
+
% (ds.pathspec, ds.attempt, ",".join(failures))
|
346
|
+
)
|
347
|
+
else:
|
348
|
+
echo(
|
349
|
+
"Logs have been scrubbed for %s - attempt %s"
|
350
|
+
% (ds.pathspec, ds.attempt)
|
351
|
+
)
|
352
|
+
|
353
|
+
else:
|
354
|
+
raise CommandException(
|
355
|
+
"No Tasks found at the given path -- "
|
356
|
+
"either none exist or they have not finished yet.\n"
|
357
|
+
"If you know the task has finished, you can supply --include-not-done to force scrub it."
|
358
|
+
)
|
metaflow/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
metaflow_version = "2.11.
|
1
|
+
metaflow_version = "2.11.15"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: metaflow
|
3
|
-
Version: 2.11.
|
3
|
+
Version: 2.11.15
|
4
4
|
Summary: Metaflow: More Data Science, Less Engineering
|
5
5
|
Author: Metaflow Developers
|
6
6
|
Author-email: help@metaflow.org
|
@@ -26,7 +26,7 @@ License-File: LICENSE
|
|
26
26
|
Requires-Dist: requests
|
27
27
|
Requires-Dist: boto3
|
28
28
|
Provides-Extra: stubs
|
29
|
-
Requires-Dist: metaflow-stubs ==2.11.
|
29
|
+
Requires-Dist: metaflow-stubs ==2.11.15 ; extra == 'stubs'
|
30
30
|
|
31
31
|

|
32
32
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
metaflow/R.py,sha256=bNcXXpGOe5wquzTRGyU0KS9gJMz7HceKjXxammYPUE0,3841
|
2
2
|
metaflow/__init__.py,sha256=c3JqZ9CTMPhz9Rxh6Jrk35pPTDjAQYQ48Ps26D-I_Bg,5659
|
3
3
|
metaflow/cards.py,sha256=tP1_RrtmqdFh741pqE4t98S7SA0MtGRlGvRICRZF1Mg,426
|
4
|
-
metaflow/cli.py,sha256=
|
4
|
+
metaflow/cli.py,sha256=2igissDxlfN7f3Bfm0urjL7xtUJJE6jasEJir3OJfsw,31392
|
5
5
|
metaflow/cli_args.py,sha256=lcgBGNTvfaiPxiUnejAe60Upt9swG6lRy1_3OqbU6MY,2616
|
6
6
|
metaflow/clone_util.py,sha256=ar4jSZt2aTd4monBpkIQmcLcsOd0relAB42qTUGt2j8,1810
|
7
7
|
metaflow/cmd_with_io.py,sha256=kl53HkAIyv0ecpItv08wZYczv7u3msD1VCcciqigqf0,588
|
@@ -34,7 +34,7 @@ metaflow/task.py,sha256=ecGaULbK8kXPnyWzH1u6wtGclm0qeJm7K95amEL17sQ,25863
|
|
34
34
|
metaflow/unbounded_foreach.py,sha256=p184WMbrMJ3xKYHwewj27ZhRUsSj_kw1jlye5gA9xJk,387
|
35
35
|
metaflow/util.py,sha256=RrjsvADLKxSqjL76CxKh_J4OJl840B9Ak3V-vXleGas,13429
|
36
36
|
metaflow/vendor.py,sha256=LZgXrh7ZSDmD32D1T5jj3OKKpXIqqxKzdMAOc5V0SD4,5162
|
37
|
-
metaflow/version.py,sha256=
|
37
|
+
metaflow/version.py,sha256=6yPUOjfvtEhta0qKpkpCuOl4wPQ8ItCvpFbYAALntvI,29
|
38
38
|
metaflow/_vendor/__init__.py,sha256=y_CiwUD3l4eAKvTVDZeqgVujMy31cAM1qjAB-HfI-9s,353
|
39
39
|
metaflow/_vendor/click/__init__.py,sha256=FkyGDQ-cbiQxP_lxgUspyFYS48f2S_pTcfKPz-d_RMo,2463
|
40
40
|
metaflow/_vendor/click/_bashcomplete.py,sha256=9J98IHQYmCAr2Jup6TDshUr5FJEen-AoQCZR0K5nKxQ,12309
|
@@ -98,12 +98,12 @@ metaflow/cmd/develop/stub_generator.py,sha256=fmiWmr4tXBBvIZdWVEhKvZWtG4vjyIsfre
|
|
98
98
|
metaflow/cmd/develop/stubs.py,sha256=hhf1giRNNlFGB5zSZdNA8tNvnJcmotXSiNN06N3_WyA,11742
|
99
99
|
metaflow/datastore/__init__.py,sha256=VxP6ddJt3rwiCkpiSfAhyVkUCOe1pgZZsytVEJzFmSQ,155
|
100
100
|
metaflow/datastore/content_addressed_store.py,sha256=dCVFAr4PltlmXNVVYt7UaBGJWe6fWuicCgb68XHqLrA,7643
|
101
|
-
metaflow/datastore/datastore_set.py,sha256=
|
101
|
+
metaflow/datastore/datastore_set.py,sha256=R5pwnxg1DD8kBY9vElvd2eMknrvwTyiSwvQs67_z9bc,2361
|
102
102
|
metaflow/datastore/datastore_storage.py,sha256=7V43QuiWDQ_Q4oHw9y7Z7X9lYj3GI-LV1-xB3d2Tt5k,9038
|
103
103
|
metaflow/datastore/exceptions.py,sha256=r7Ab5FvHIzyFh6kwiptA1lO5nLqWg0xRBoeYGefvapA,373
|
104
|
-
metaflow/datastore/flow_datastore.py,sha256=
|
104
|
+
metaflow/datastore/flow_datastore.py,sha256=kbJcOLYnvPHgJfZ_WWkD9LJSX1PHI1K6f9oVUu08A9U,10235
|
105
105
|
metaflow/datastore/inputs.py,sha256=i43dXr2xvgtsgKMO9allgCR18bk80GeayeQFyUTH36w,449
|
106
|
-
metaflow/datastore/task_datastore.py,sha256=
|
106
|
+
metaflow/datastore/task_datastore.py,sha256=RWO-2p_vyJfTV9JtW2dIdt7IW7n_OD8ff43YJwetiW0,36169
|
107
107
|
metaflow/extension_support/__init__.py,sha256=GK3P6YbIN4S7r3rbofzh4xaIJ6wsmDsE7iEMIlbXgMM,49334
|
108
108
|
metaflow/extension_support/_empty_file.py,sha256=HENjnM4uAfeNygxMB_feCCWORFoSat9n_QwzSx2oXPw,109
|
109
109
|
metaflow/extension_support/cmd.py,sha256=hk8iBUUINqvKCDxInKgWpum8ThiRZtHSJP7qBASHzl8,5711
|
@@ -118,12 +118,13 @@ metaflow/mflog/mflog.py,sha256=VebXxqitOtNAs7VJixnNfziO_i_urG7bsJ5JiB5IXgY,4370
|
|
118
118
|
metaflow/mflog/save_logs.py,sha256=ZBAF4BMukw4FMAC7odpr9OI2BC_2petPtDX0ca6srC4,2352
|
119
119
|
metaflow/mflog/save_logs_periodically.py,sha256=2Uvk9hi-zlCqXxOQoXmmjH1SCugfw6eG6w70WgfI-ho,1256
|
120
120
|
metaflow/mflog/tee.py,sha256=wTER15qeHuiRpCkOqo-bd-r3Gj-EVlf3IvWRCA4beW4,887
|
121
|
-
metaflow/plugins/__init__.py,sha256=
|
121
|
+
metaflow/plugins/__init__.py,sha256=eHwQSfm9SkQUFdnT7eklMQZhytevWuxMStQJPDYMhNw,6280
|
122
122
|
metaflow/plugins/catch_decorator.py,sha256=UOM2taN_OL2RPpuJhwEOA9ZALm0-hHD0XS2Hn2GUev0,4061
|
123
123
|
metaflow/plugins/debug_logger.py,sha256=mcF5HYzJ0NQmqCMjyVUk3iAP-heroHRIiVWQC6Ha2-I,879
|
124
124
|
metaflow/plugins/debug_monitor.py,sha256=Md5X_sDOSssN9pt2D8YcaIjTK5JaQD55UAYTcF6xYF0,1099
|
125
125
|
metaflow/plugins/environment_decorator.py,sha256=6m9j2B77d-Ja_l_9CTJ__0O6aB2a8Qt_lAZu6UjAcUA,587
|
126
126
|
metaflow/plugins/events_decorator.py,sha256=c2GcH6Mspbey3wBkjM5lqxaNByFOzYDQdllLpXzRNv8,18283
|
127
|
+
metaflow/plugins/logs_cli.py,sha256=7-LxLRy8X5s4KcoMSc183cbNIJnIcedf6oWPe13-hQs,11409
|
127
128
|
metaflow/plugins/package_cli.py,sha256=-J6D4cupHfWSZ4GEFo2yy9Je9oL3owRWm5pEJwaiqd4,1649
|
128
129
|
metaflow/plugins/parallel_decorator.py,sha256=yrwfqdTmvEyR3YdvJiPChVOK5vjYTibC09kDs7t6_kg,4444
|
129
130
|
metaflow/plugins/project_decorator.py,sha256=eJOe0Ea7CbUCReEhR_XQvRkhV6jyRqDxM72oZI7EMCk,5336
|
@@ -158,7 +159,7 @@ metaflow/plugins/aws/aws_client.py,sha256=mO8UD6pxFaOnxDb3hTP3HB7Gqb_ZxoR-76LT68
|
|
158
159
|
metaflow/plugins/aws/aws_utils.py,sha256=mSMBTUQ-CELhyPb6w3_Yq6_Hvd_6vbhAojYkrt2RNt8,6941
|
159
160
|
metaflow/plugins/aws/batch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
160
161
|
metaflow/plugins/aws/batch/batch.py,sha256=e9ssahWM18GnipPK2sqYB-ztx9w7Eoo7YtWyEtufYxs,17787
|
161
|
-
metaflow/plugins/aws/batch/batch_cli.py,sha256=
|
162
|
+
metaflow/plugins/aws/batch/batch_cli.py,sha256=6PTbyajRgdy0XmjyJLBTdKdiOB84dcovQQ8sFXlJqko,11749
|
162
163
|
metaflow/plugins/aws/batch/batch_client.py,sha256=s9ZHhxQPPoBQijLUgn6_16QOaD4-22U_44uJbp-yLkI,28565
|
163
164
|
metaflow/plugins/aws/batch/batch_decorator.py,sha256=o2UH_E6k98ZpbWTvMooKv4V7nwpdHmh5SwCGzWubv0c,17313
|
164
165
|
metaflow/plugins/aws/secrets_manager/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -213,7 +214,7 @@ metaflow/plugins/datastores/s3_storage.py,sha256=CZdNqaKtxDXQbEg2YHyphph3hWcLIE5
|
|
213
214
|
metaflow/plugins/datatools/__init__.py,sha256=ge4L16OBQLy2J_MMvoHg3lMfdm-MluQgRWoyZ5GCRnk,1267
|
214
215
|
metaflow/plugins/datatools/local.py,sha256=67hx3O_vInERlL0aJV0Sd-jUTd_2DOw4sJ4-IyEKNKM,4213
|
215
216
|
metaflow/plugins/datatools/s3/__init__.py,sha256=14tr9fPjN3ULW5IOfKHeG7Uhjmgm7LMtQHfz1SFv-h8,248
|
216
|
-
metaflow/plugins/datatools/s3/s3.py,sha256=
|
217
|
+
metaflow/plugins/datatools/s3/s3.py,sha256=CQynofOk0l_sJMakTDhb8IlVKqlL-Ko1fmY5mKJTbes,66113
|
217
218
|
metaflow/plugins/datatools/s3/s3op.py,sha256=ZQFSxlaQUt-Ko_kIXMbHOKJc8q4FPXogS3xI6xsDR7Y,43390
|
218
219
|
metaflow/plugins/datatools/s3/s3tail.py,sha256=boQjQGQMI-bvTqcMP2y7uSlSYLcvWOy7J3ZUaF78NAA,2597
|
219
220
|
metaflow/plugins/datatools/s3/s3util.py,sha256=FgRgaVmEq7-i2dV7q8XK5w5PfFt-xJjZa8WrK8IJfdI,3769
|
@@ -298,9 +299,9 @@ metaflow/tutorials/07-worldview/README.md,sha256=5vQTrFqulJ7rWN6r20dhot9lI2sVj9W
|
|
298
299
|
metaflow/tutorials/07-worldview/worldview.ipynb,sha256=ztPZPI9BXxvW1QdS2Tfe7LBuVzvFvv0AToDnsDJhLdE,2237
|
299
300
|
metaflow/tutorials/08-autopilot/README.md,sha256=GnePFp_q76jPs991lMUqfIIh5zSorIeWznyiUxzeUVE,1039
|
300
301
|
metaflow/tutorials/08-autopilot/autopilot.ipynb,sha256=DQoJlILV7Mq9vfPBGW-QV_kNhWPjS5n6SJLqePjFYLY,3191
|
301
|
-
metaflow-2.11.
|
302
|
-
metaflow-2.11.
|
303
|
-
metaflow-2.11.
|
304
|
-
metaflow-2.11.
|
305
|
-
metaflow-2.11.
|
306
|
-
metaflow-2.11.
|
302
|
+
metaflow-2.11.15.dist-info/LICENSE,sha256=nl_Lt5v9VvJ-5lWJDT4ddKAG-VZ-2IaLmbzpgYDz2hU,11343
|
303
|
+
metaflow-2.11.15.dist-info/METADATA,sha256=QwGD7fah-tge1xhdVxg9WlsgtsvpC79HfP24Nzx-KEA,5908
|
304
|
+
metaflow-2.11.15.dist-info/WHEEL,sha256=DZajD4pwLWue70CAfc7YaxT1wLUciNBvN_TTcvXpltE,110
|
305
|
+
metaflow-2.11.15.dist-info/entry_points.txt,sha256=IKwTN1T3I5eJL3uo_vnkyxVffcgnRdFbKwlghZfn27k,57
|
306
|
+
metaflow-2.11.15.dist-info/top_level.txt,sha256=v1pDHoWaSaKeuc5fKTRSfsXCKSdW1zvNVmvA-i0if3o,9
|
307
|
+
metaflow-2.11.15.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|