metaflow 2.18.12__py2.py3-none-any.whl → 2.19.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +1 -0
- metaflow/cli.py +78 -13
- metaflow/cli_components/run_cmds.py +182 -39
- metaflow/cli_components/step_cmd.py +160 -4
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +162 -99
- metaflow/client/filecache.py +59 -32
- metaflow/cmd/code/__init__.py +2 -1
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +40 -9
- metaflow/datastore/datastore_set.py +10 -1
- metaflow/datastore/flow_datastore.py +123 -4
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +86 -2
- metaflow/decorators.py +75 -6
- metaflow/extension_support/__init__.py +372 -305
- metaflow/flowspec.py +3 -2
- metaflow/graph.py +2 -2
- metaflow/metaflow_config.py +41 -0
- metaflow/metaflow_profile.py +18 -0
- metaflow/packaging_sys/utils.py +2 -39
- metaflow/packaging_sys/v1.py +63 -16
- metaflow/plugins/__init__.py +2 -0
- metaflow/plugins/argo/argo_workflows.py +20 -25
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/cards/card_datastore.py +13 -13
- metaflow/plugins/cards/card_decorator.py +1 -0
- metaflow/plugins/cards/card_modules/basic.py +9 -3
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/s3/s3.py +29 -10
- metaflow/plugins/datatools/s3/s3op.py +90 -62
- metaflow/plugins/metadata_providers/local.py +76 -82
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/runner/click_api.py +4 -2
- metaflow/runner/metaflow_runner.py +210 -19
- metaflow/runtime.py +348 -21
- metaflow/task.py +61 -12
- metaflow/user_configs/config_parameters.py +2 -4
- metaflow/user_decorators/mutable_flow.py +1 -1
- metaflow/user_decorators/user_step_decorator.py +10 -1
- metaflow/util.py +191 -1
- metaflow/version.py +1 -1
- {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Makefile +10 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/METADATA +2 -4
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/RECORD +52 -48
- {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Tiltfile +0 -0
- {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/WHEEL +0 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/entry_points.txt +0 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/licenses/LICENSE +0 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/top_level.txt +0 -0
metaflow/runtime.py
CHANGED
|
@@ -26,20 +26,29 @@ from metaflow.datastore.exceptions import DataException
|
|
|
26
26
|
from contextlib import contextmanager
|
|
27
27
|
|
|
28
28
|
from . import get_namespace
|
|
29
|
+
from .client.filecache import FileCache, FileBlobCache, TaskMetadataCache
|
|
29
30
|
from .metadata_provider import MetaDatum
|
|
30
|
-
from .metaflow_config import
|
|
31
|
+
from .metaflow_config import (
|
|
32
|
+
FEAT_ALWAYS_UPLOAD_CODE_PACKAGE,
|
|
33
|
+
MAX_ATTEMPTS,
|
|
34
|
+
UI_URL,
|
|
35
|
+
SPIN_ALLOWED_DECORATORS,
|
|
36
|
+
SPIN_DISALLOWED_DECORATORS,
|
|
37
|
+
)
|
|
38
|
+
from .metaflow_profile import from_start
|
|
39
|
+
from .plugins import DATASTORES
|
|
31
40
|
from .exception import (
|
|
32
41
|
MetaflowException,
|
|
33
42
|
MetaflowInternalError,
|
|
34
43
|
METAFLOW_EXIT_DISALLOW_RETRY,
|
|
35
44
|
)
|
|
36
45
|
from . import procpoll
|
|
37
|
-
from .datastore import TaskDataStoreSet
|
|
46
|
+
from .datastore import FlowDataStore, TaskDataStoreSet
|
|
38
47
|
from .debug import debug
|
|
39
48
|
from .decorators import flow_decorators
|
|
40
49
|
from .flowspec import _FlowState
|
|
41
50
|
from .mflog import mflog, RUNTIME_LOG_SOURCE
|
|
42
|
-
from .util import to_unicode, compress_list, unicode_type
|
|
51
|
+
from .util import to_unicode, compress_list, unicode_type, get_latest_task_pathspec
|
|
43
52
|
from .clone_util import clone_task_helper
|
|
44
53
|
from .unbounded_foreach import (
|
|
45
54
|
CONTROL_TASK_TAG,
|
|
@@ -85,6 +94,253 @@ mflog_msg = partial(mflog.decorate, RUNTIME_LOG_SOURCE)
|
|
|
85
94
|
# TODO option: output dot graph periodically about execution
|
|
86
95
|
|
|
87
96
|
|
|
97
|
+
class SpinRuntime(object):
|
|
98
|
+
def __init__(
|
|
99
|
+
self,
|
|
100
|
+
flow,
|
|
101
|
+
graph,
|
|
102
|
+
flow_datastore,
|
|
103
|
+
metadata,
|
|
104
|
+
environment,
|
|
105
|
+
package,
|
|
106
|
+
logger,
|
|
107
|
+
entrypoint,
|
|
108
|
+
event_logger,
|
|
109
|
+
monitor,
|
|
110
|
+
step_func,
|
|
111
|
+
step_name,
|
|
112
|
+
spin_pathspec,
|
|
113
|
+
skip_decorators=False,
|
|
114
|
+
artifacts_module=None,
|
|
115
|
+
persist=True,
|
|
116
|
+
max_log_size=MAX_LOG_SIZE,
|
|
117
|
+
):
|
|
118
|
+
from metaflow import Task
|
|
119
|
+
|
|
120
|
+
self._flow = flow
|
|
121
|
+
self._graph = graph
|
|
122
|
+
self._flow_datastore = flow_datastore
|
|
123
|
+
self._metadata = metadata
|
|
124
|
+
self._environment = environment
|
|
125
|
+
self._package = package
|
|
126
|
+
self._logger = logger
|
|
127
|
+
self._entrypoint = entrypoint
|
|
128
|
+
self._event_logger = event_logger
|
|
129
|
+
self._monitor = monitor
|
|
130
|
+
|
|
131
|
+
self._step_func = step_func
|
|
132
|
+
|
|
133
|
+
# Determine if we have a complete pathspec or need to get the task
|
|
134
|
+
if spin_pathspec:
|
|
135
|
+
parts = spin_pathspec.split("/")
|
|
136
|
+
if len(parts) == 4:
|
|
137
|
+
# Complete pathspec: flow/run/step/task_id
|
|
138
|
+
try:
|
|
139
|
+
# If user provides whole pathspec, we do not need to check namespace
|
|
140
|
+
task = Task(spin_pathspec, _namespace_check=False)
|
|
141
|
+
except Exception:
|
|
142
|
+
raise MetaflowException(
|
|
143
|
+
f"Invalid pathspec: {spin_pathspec} for step: {step_name}"
|
|
144
|
+
)
|
|
145
|
+
elif len(parts) == 3:
|
|
146
|
+
# Partial pathspec: flow/run/step - need to get the task
|
|
147
|
+
_, run_id, _ = parts
|
|
148
|
+
task = get_latest_task_pathspec(flow.name, step_name, run_id=run_id)
|
|
149
|
+
logger(
|
|
150
|
+
f"To make spin even faster, provide complete pathspec with task_id: {task.pathspec}",
|
|
151
|
+
system_msg=True,
|
|
152
|
+
)
|
|
153
|
+
else:
|
|
154
|
+
raise MetaflowException(
|
|
155
|
+
f"Invalid pathspec format: {spin_pathspec}. Expected flow/run/step or flow/run/step/task_id"
|
|
156
|
+
)
|
|
157
|
+
else:
|
|
158
|
+
# No pathspec provided, get latest task for this step
|
|
159
|
+
task = get_latest_task_pathspec(flow.name, step_name)
|
|
160
|
+
logger(
|
|
161
|
+
f"To make spin even faster, provide complete pathspec {task.pathspec}",
|
|
162
|
+
system_msg=True,
|
|
163
|
+
)
|
|
164
|
+
from_start("SpinRuntime: after getting task")
|
|
165
|
+
|
|
166
|
+
# Get the original FlowDatastore so we can use it to access artifacts from the
|
|
167
|
+
# spun task
|
|
168
|
+
meta_dict = task.metadata_dict
|
|
169
|
+
ds_type = meta_dict["ds-type"]
|
|
170
|
+
ds_root = meta_dict["ds-root"]
|
|
171
|
+
orig_datastore_impl = [d for d in DATASTORES if d.TYPE == ds_type][0]
|
|
172
|
+
orig_datastore_impl.datastore_root = ds_root
|
|
173
|
+
spin_pathspec = task.pathspec
|
|
174
|
+
orig_flow_datastore = FlowDataStore(
|
|
175
|
+
flow.name,
|
|
176
|
+
environment=None,
|
|
177
|
+
storage_impl=orig_datastore_impl,
|
|
178
|
+
ds_root=ds_root,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
self._filecache = FileCache()
|
|
182
|
+
orig_flow_datastore.set_metadata_cache(
|
|
183
|
+
TaskMetadataCache(self._filecache, ds_type, ds_root, flow.name)
|
|
184
|
+
)
|
|
185
|
+
orig_flow_datastore.ca_store.set_blob_cache(
|
|
186
|
+
FileBlobCache(
|
|
187
|
+
self._filecache, FileCache.flow_ds_id(ds_type, ds_root, flow.name)
|
|
188
|
+
)
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
self._orig_flow_datastore = orig_flow_datastore
|
|
192
|
+
self._spin_pathspec = spin_pathspec
|
|
193
|
+
self._persist = persist
|
|
194
|
+
self._spin_task = task
|
|
195
|
+
self._input_paths = None
|
|
196
|
+
self._split_index = None
|
|
197
|
+
self._whitelist_decorators = None
|
|
198
|
+
self._config_file_name = None
|
|
199
|
+
self._skip_decorators = skip_decorators
|
|
200
|
+
self._artifacts_module = artifacts_module
|
|
201
|
+
self._max_log_size = max_log_size
|
|
202
|
+
self._encoding = sys.stdout.encoding or "UTF-8"
|
|
203
|
+
|
|
204
|
+
# Create a new run_id for the spin task
|
|
205
|
+
self.run_id = self._metadata.new_run_id()
|
|
206
|
+
# Raise exception if we have a black listed decorator
|
|
207
|
+
for deco in self._step_func.decorators:
|
|
208
|
+
if deco.name in SPIN_DISALLOWED_DECORATORS:
|
|
209
|
+
raise MetaflowException(
|
|
210
|
+
f"Spinning steps with @{deco.name} decorator is not supported."
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
for deco in self.whitelist_decorators:
|
|
214
|
+
deco.runtime_init(flow, graph, package, self.run_id)
|
|
215
|
+
from_start("SpinRuntime: after init decorators")
|
|
216
|
+
|
|
217
|
+
@property
|
|
218
|
+
def split_index(self):
|
|
219
|
+
"""
|
|
220
|
+
Returns the split index, caching the result after the first access.
|
|
221
|
+
"""
|
|
222
|
+
if self._split_index is None:
|
|
223
|
+
self._split_index = getattr(self._spin_task, "index", None)
|
|
224
|
+
|
|
225
|
+
return self._split_index
|
|
226
|
+
|
|
227
|
+
@property
|
|
228
|
+
def input_paths(self):
|
|
229
|
+
def _format_input_paths(task_pathspec, attempt):
|
|
230
|
+
_, run_id, step_name, task_id = task_pathspec.split("/")
|
|
231
|
+
return f"{run_id}/{step_name}/{task_id}/{attempt}"
|
|
232
|
+
|
|
233
|
+
if self._input_paths:
|
|
234
|
+
return self._input_paths
|
|
235
|
+
|
|
236
|
+
if self._step_func.name == "start":
|
|
237
|
+
from metaflow import Step
|
|
238
|
+
|
|
239
|
+
flow_name, run_id, _, _ = self._spin_pathspec.split("/")
|
|
240
|
+
task = Step(
|
|
241
|
+
f"{flow_name}/{run_id}/_parameters", _namespace_check=False
|
|
242
|
+
).task
|
|
243
|
+
self._input_paths = [
|
|
244
|
+
_format_input_paths(task.pathspec, task.current_attempt)
|
|
245
|
+
]
|
|
246
|
+
else:
|
|
247
|
+
parent_tasks = self._spin_task.parent_tasks
|
|
248
|
+
self._input_paths = [
|
|
249
|
+
_format_input_paths(t.pathspec, t.current_attempt) for t in parent_tasks
|
|
250
|
+
]
|
|
251
|
+
return self._input_paths
|
|
252
|
+
|
|
253
|
+
@property
|
|
254
|
+
def whitelist_decorators(self):
|
|
255
|
+
if self._skip_decorators:
|
|
256
|
+
self._whitelist_decorators = []
|
|
257
|
+
return self._whitelist_decorators
|
|
258
|
+
if self._whitelist_decorators:
|
|
259
|
+
return self._whitelist_decorators
|
|
260
|
+
self._whitelist_decorators = [
|
|
261
|
+
deco
|
|
262
|
+
for deco in self._step_func.decorators
|
|
263
|
+
if any(deco.name.startswith(prefix) for prefix in SPIN_ALLOWED_DECORATORS)
|
|
264
|
+
]
|
|
265
|
+
return self._whitelist_decorators
|
|
266
|
+
|
|
267
|
+
def _new_task(self, step, input_paths=None, **kwargs):
|
|
268
|
+
return Task(
|
|
269
|
+
flow_datastore=self._flow_datastore,
|
|
270
|
+
flow=self._flow,
|
|
271
|
+
step=step,
|
|
272
|
+
run_id=self.run_id,
|
|
273
|
+
metadata=self._metadata,
|
|
274
|
+
environment=self._environment,
|
|
275
|
+
entrypoint=self._entrypoint,
|
|
276
|
+
event_logger=self._event_logger,
|
|
277
|
+
monitor=self._monitor,
|
|
278
|
+
input_paths=input_paths,
|
|
279
|
+
decos=self.whitelist_decorators,
|
|
280
|
+
logger=self._logger,
|
|
281
|
+
split_index=self.split_index,
|
|
282
|
+
**kwargs,
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
def execute(self):
|
|
286
|
+
exception = None
|
|
287
|
+
with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as config_file:
|
|
288
|
+
config_value = dump_config_values(self._flow)
|
|
289
|
+
if config_value:
|
|
290
|
+
json.dump(config_value, config_file)
|
|
291
|
+
config_file.flush()
|
|
292
|
+
self._config_file_name = config_file.name
|
|
293
|
+
else:
|
|
294
|
+
self._config_file_name = None
|
|
295
|
+
from_start("SpinRuntime: config values processed")
|
|
296
|
+
self.task = self._new_task(self._step_func.name, self.input_paths)
|
|
297
|
+
try:
|
|
298
|
+
self._launch_and_monitor_task()
|
|
299
|
+
except Exception as ex:
|
|
300
|
+
self._logger("Task failed.", system_msg=True, bad=True)
|
|
301
|
+
exception = ex
|
|
302
|
+
raise
|
|
303
|
+
finally:
|
|
304
|
+
for deco in self.whitelist_decorators:
|
|
305
|
+
deco.runtime_finished(exception)
|
|
306
|
+
|
|
307
|
+
def _launch_and_monitor_task(self):
|
|
308
|
+
worker = Worker(
|
|
309
|
+
self.task,
|
|
310
|
+
self._max_log_size,
|
|
311
|
+
self._config_file_name,
|
|
312
|
+
orig_flow_datastore=self._orig_flow_datastore,
|
|
313
|
+
spin_pathspec=self._spin_pathspec,
|
|
314
|
+
artifacts_module=self._artifacts_module,
|
|
315
|
+
persist=self._persist,
|
|
316
|
+
skip_decorators=self._skip_decorators,
|
|
317
|
+
)
|
|
318
|
+
from_start("SpinRuntime: created worker")
|
|
319
|
+
|
|
320
|
+
poll = procpoll.make_poll()
|
|
321
|
+
fds = worker.fds()
|
|
322
|
+
for fd in fds:
|
|
323
|
+
poll.add(fd)
|
|
324
|
+
|
|
325
|
+
active_fds = set(fds)
|
|
326
|
+
|
|
327
|
+
while active_fds:
|
|
328
|
+
events = poll.poll(POLL_TIMEOUT)
|
|
329
|
+
for event in events:
|
|
330
|
+
if event.can_read:
|
|
331
|
+
worker.read_logline(event.fd)
|
|
332
|
+
if event.is_terminated:
|
|
333
|
+
poll.remove(event.fd)
|
|
334
|
+
active_fds.remove(event.fd)
|
|
335
|
+
from_start("SpinRuntime: read loglines")
|
|
336
|
+
returncode = worker.terminate()
|
|
337
|
+
from_start("SpinRuntime: worker terminated")
|
|
338
|
+
if returncode != 0:
|
|
339
|
+
raise TaskFailed(self.task, f"Task failed with return code {returncode}")
|
|
340
|
+
else:
|
|
341
|
+
self._logger("Task finished successfully.", system_msg=True)
|
|
342
|
+
|
|
343
|
+
|
|
88
344
|
class NativeRuntime(object):
|
|
89
345
|
def __init__(
|
|
90
346
|
self,
|
|
@@ -1769,8 +2025,27 @@ class CLIArgs(object):
|
|
|
1769
2025
|
for step execution in StepDecorator.runtime_step_cli().
|
|
1770
2026
|
"""
|
|
1771
2027
|
|
|
1772
|
-
def __init__(
|
|
2028
|
+
def __init__(
|
|
2029
|
+
self,
|
|
2030
|
+
task,
|
|
2031
|
+
orig_flow_datastore=None,
|
|
2032
|
+
spin_pathspec=None,
|
|
2033
|
+
artifacts_module=None,
|
|
2034
|
+
persist=True,
|
|
2035
|
+
skip_decorators=False,
|
|
2036
|
+
):
|
|
1773
2037
|
self.task = task
|
|
2038
|
+
if orig_flow_datastore is not None:
|
|
2039
|
+
self.orig_flow_datastore = "%s@%s" % (
|
|
2040
|
+
orig_flow_datastore.TYPE,
|
|
2041
|
+
orig_flow_datastore.datastore_root,
|
|
2042
|
+
)
|
|
2043
|
+
else:
|
|
2044
|
+
self.orig_flow_datastore = None
|
|
2045
|
+
self.spin_pathspec = spin_pathspec
|
|
2046
|
+
self.artifacts_module = artifacts_module
|
|
2047
|
+
self.persist = persist
|
|
2048
|
+
self.skip_decorators = skip_decorators
|
|
1774
2049
|
self.entrypoint = list(task.entrypoint)
|
|
1775
2050
|
step_obj = getattr(self.task.flow, self.task.step)
|
|
1776
2051
|
self.top_level_options = {
|
|
@@ -1808,21 +2083,49 @@ class CLIArgs(object):
|
|
|
1808
2083
|
(k, ConfigInput.make_key_name(k)) for k in configs
|
|
1809
2084
|
]
|
|
1810
2085
|
|
|
2086
|
+
if spin_pathspec:
|
|
2087
|
+
self.spin_args()
|
|
2088
|
+
else:
|
|
2089
|
+
self.default_args()
|
|
2090
|
+
|
|
2091
|
+
def default_args(self):
|
|
1811
2092
|
self.commands = ["step"]
|
|
1812
2093
|
self.command_args = [self.task.step]
|
|
1813
2094
|
self.command_options = {
|
|
1814
|
-
"run-id": task.run_id,
|
|
1815
|
-
"task-id": task.task_id,
|
|
1816
|
-
"input-paths": compress_list(task.input_paths),
|
|
1817
|
-
"split-index": task.split_index,
|
|
1818
|
-
"retry-count": task.retries,
|
|
1819
|
-
"max-user-code-retries": task.user_code_retries,
|
|
1820
|
-
"tag": task.tags,
|
|
2095
|
+
"run-id": self.task.run_id,
|
|
2096
|
+
"task-id": self.task.task_id,
|
|
2097
|
+
"input-paths": compress_list(self.task.input_paths),
|
|
2098
|
+
"split-index": self.task.split_index,
|
|
2099
|
+
"retry-count": self.task.retries,
|
|
2100
|
+
"max-user-code-retries": self.task.user_code_retries,
|
|
2101
|
+
"tag": self.task.tags,
|
|
1821
2102
|
"namespace": get_namespace() or "",
|
|
1822
|
-
"ubf-context": task.ubf_context,
|
|
2103
|
+
"ubf-context": self.task.ubf_context,
|
|
1823
2104
|
}
|
|
1824
2105
|
self.env = {}
|
|
1825
2106
|
|
|
2107
|
+
def spin_args(self):
|
|
2108
|
+
self.commands = ["spin-step"]
|
|
2109
|
+
self.command_args = [self.task.step]
|
|
2110
|
+
|
|
2111
|
+
self.command_options = {
|
|
2112
|
+
"run-id": self.task.run_id,
|
|
2113
|
+
"task-id": self.task.task_id,
|
|
2114
|
+
"input-paths": compress_list(self.task.input_paths),
|
|
2115
|
+
"split-index": self.task.split_index,
|
|
2116
|
+
"retry-count": self.task.retries,
|
|
2117
|
+
"max-user-code-retries": self.task.user_code_retries,
|
|
2118
|
+
"namespace": get_namespace() or "",
|
|
2119
|
+
"orig-flow-datastore": self.orig_flow_datastore,
|
|
2120
|
+
"artifacts-module": self.artifacts_module,
|
|
2121
|
+
"skip-decorators": self.skip_decorators,
|
|
2122
|
+
}
|
|
2123
|
+
if self.persist:
|
|
2124
|
+
self.command_options["persist"] = True
|
|
2125
|
+
else:
|
|
2126
|
+
self.command_options["no-persist"] = True
|
|
2127
|
+
self.env = {}
|
|
2128
|
+
|
|
1826
2129
|
def get_args(self):
|
|
1827
2130
|
# TODO: Make one with dict_to_cli_options; see cli_args.py for more detail
|
|
1828
2131
|
def _options(mapping):
|
|
@@ -1861,9 +2164,24 @@ class CLIArgs(object):
|
|
|
1861
2164
|
|
|
1862
2165
|
|
|
1863
2166
|
class Worker(object):
|
|
1864
|
-
def __init__(
|
|
2167
|
+
def __init__(
|
|
2168
|
+
self,
|
|
2169
|
+
task,
|
|
2170
|
+
max_logs_size,
|
|
2171
|
+
config_file_name,
|
|
2172
|
+
orig_flow_datastore=None,
|
|
2173
|
+
spin_pathspec=None,
|
|
2174
|
+
artifacts_module=None,
|
|
2175
|
+
persist=True,
|
|
2176
|
+
skip_decorators=False,
|
|
2177
|
+
):
|
|
1865
2178
|
self.task = task
|
|
1866
2179
|
self._config_file_name = config_file_name
|
|
2180
|
+
self._orig_flow_datastore = orig_flow_datastore
|
|
2181
|
+
self._spin_pathspec = spin_pathspec
|
|
2182
|
+
self._artifacts_module = artifacts_module
|
|
2183
|
+
self._skip_decorators = skip_decorators
|
|
2184
|
+
self._persist = persist
|
|
1867
2185
|
self._proc = self._launch()
|
|
1868
2186
|
|
|
1869
2187
|
if task.retries > task.user_code_retries:
|
|
@@ -1895,7 +2213,14 @@ class Worker(object):
|
|
|
1895
2213
|
# not it is properly shut down)
|
|
1896
2214
|
|
|
1897
2215
|
def _launch(self):
|
|
1898
|
-
args = CLIArgs(
|
|
2216
|
+
args = CLIArgs(
|
|
2217
|
+
self.task,
|
|
2218
|
+
orig_flow_datastore=self._orig_flow_datastore,
|
|
2219
|
+
spin_pathspec=self._spin_pathspec,
|
|
2220
|
+
artifacts_module=self._artifacts_module,
|
|
2221
|
+
persist=self._persist,
|
|
2222
|
+
skip_decorators=self._skip_decorators,
|
|
2223
|
+
)
|
|
1899
2224
|
env = dict(os.environ)
|
|
1900
2225
|
|
|
1901
2226
|
if self.task.clone_run_id:
|
|
@@ -1928,6 +2253,7 @@ class Worker(object):
|
|
|
1928
2253
|
# by read_logline() below that relies on readline() not blocking
|
|
1929
2254
|
# print('running', args)
|
|
1930
2255
|
cmdline = args.get_args()
|
|
2256
|
+
from_start(f"Command line: {' '.join(cmdline)}")
|
|
1931
2257
|
debug.subcommand_exec(cmdline)
|
|
1932
2258
|
return subprocess.Popen(
|
|
1933
2259
|
cmdline,
|
|
@@ -2050,13 +2376,14 @@ class Worker(object):
|
|
|
2050
2376
|
else:
|
|
2051
2377
|
self.emit_log(b"Task failed.", self._stderr, system_msg=True)
|
|
2052
2378
|
else:
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2379
|
+
if not self._spin_pathspec:
|
|
2380
|
+
num = self.task.results["_foreach_num_splits"]
|
|
2381
|
+
if num:
|
|
2382
|
+
self.task.log(
|
|
2383
|
+
"Foreach yields %d child steps." % num,
|
|
2384
|
+
system_msg=True,
|
|
2385
|
+
pid=self._proc.pid,
|
|
2386
|
+
)
|
|
2060
2387
|
self.task.log(
|
|
2061
2388
|
"Task finished successfully.", system_msg=True, pid=self._proc.pid
|
|
2062
2389
|
)
|
metaflow/task.py
CHANGED
|
@@ -6,14 +6,15 @@ import os
|
|
|
6
6
|
import time
|
|
7
7
|
import traceback
|
|
8
8
|
|
|
9
|
-
|
|
10
9
|
from types import MethodType, FunctionType
|
|
11
10
|
|
|
12
11
|
from metaflow.sidecar import Message, MessageTypes
|
|
13
12
|
from metaflow.datastore.exceptions import DataException
|
|
14
13
|
|
|
14
|
+
from metaflow.plugins import METADATA_PROVIDERS
|
|
15
15
|
from .metaflow_config import MAX_ATTEMPTS
|
|
16
16
|
from .metadata_provider import MetaDatum
|
|
17
|
+
from .metaflow_profile import from_start
|
|
17
18
|
from .mflog import TASK_LOG_SOURCE
|
|
18
19
|
from .datastore import Inputs, TaskDataStoreSet
|
|
19
20
|
from .exception import (
|
|
@@ -49,6 +50,8 @@ class MetaflowTask(object):
|
|
|
49
50
|
event_logger,
|
|
50
51
|
monitor,
|
|
51
52
|
ubf_context,
|
|
53
|
+
orig_flow_datastore=None,
|
|
54
|
+
spin_artifacts=None,
|
|
52
55
|
):
|
|
53
56
|
self.flow = flow
|
|
54
57
|
self.flow_datastore = flow_datastore
|
|
@@ -58,6 +61,8 @@ class MetaflowTask(object):
|
|
|
58
61
|
self.event_logger = event_logger
|
|
59
62
|
self.monitor = monitor
|
|
60
63
|
self.ubf_context = ubf_context
|
|
64
|
+
self.orig_flow_datastore = orig_flow_datastore
|
|
65
|
+
self.spin_artifacts = spin_artifacts
|
|
61
66
|
|
|
62
67
|
def _exec_step_function(self, step_function, orig_step_func, input_obj=None):
|
|
63
68
|
wrappers_stack = []
|
|
@@ -150,6 +155,7 @@ class MetaflowTask(object):
|
|
|
150
155
|
graph_node = self.flow._graph[orig_step_func.name]
|
|
151
156
|
out_funcs = [getattr(self.flow, f) for f in graph_node.out_funcs]
|
|
152
157
|
if out_funcs:
|
|
158
|
+
self.flow._transition = None
|
|
153
159
|
if isinstance(fake_next_call_args, dict) and fake_next_call_args:
|
|
154
160
|
# Not an empty dictionary -- we use this as arguments for the next
|
|
155
161
|
# call
|
|
@@ -233,7 +239,6 @@ class MetaflowTask(object):
|
|
|
233
239
|
lambda _, parameter_ds=parameter_ds: parameter_ds["_graph_info"],
|
|
234
240
|
)
|
|
235
241
|
all_vars.append("_graph_info")
|
|
236
|
-
|
|
237
242
|
if passdown:
|
|
238
243
|
self.flow._datastore.passdown_partial(parameter_ds, all_vars)
|
|
239
244
|
return param_only_vars
|
|
@@ -261,6 +266,9 @@ class MetaflowTask(object):
|
|
|
261
266
|
run_id,
|
|
262
267
|
pathspecs=input_paths,
|
|
263
268
|
prefetch_data_artifacts=prefetch_data_artifacts,
|
|
269
|
+
join_type=join_type,
|
|
270
|
+
orig_flow_datastore=self.orig_flow_datastore,
|
|
271
|
+
spin_artifacts=self.spin_artifacts,
|
|
264
272
|
)
|
|
265
273
|
ds_list = [ds for ds in datastore_set]
|
|
266
274
|
if len(ds_list) != len(input_paths):
|
|
@@ -272,10 +280,27 @@ class MetaflowTask(object):
|
|
|
272
280
|
# initialize directly in the single input case.
|
|
273
281
|
ds_list = []
|
|
274
282
|
for input_path in input_paths:
|
|
275
|
-
|
|
283
|
+
parts = input_path.split("/")
|
|
284
|
+
if len(parts) == 3:
|
|
285
|
+
run_id, step_name, task_id = parts
|
|
286
|
+
attempt = None
|
|
287
|
+
else:
|
|
288
|
+
run_id, step_name, task_id, attempt = parts
|
|
289
|
+
attempt = int(attempt)
|
|
290
|
+
|
|
276
291
|
ds_list.append(
|
|
277
|
-
self.flow_datastore.get_task_datastore(
|
|
292
|
+
self.flow_datastore.get_task_datastore(
|
|
293
|
+
run_id,
|
|
294
|
+
step_name,
|
|
295
|
+
task_id,
|
|
296
|
+
attempt=attempt,
|
|
297
|
+
join_type=join_type,
|
|
298
|
+
orig_flow_datastore=self.orig_flow_datastore,
|
|
299
|
+
spin_artifacts=self.spin_artifacts,
|
|
300
|
+
)
|
|
278
301
|
)
|
|
302
|
+
from_start("MetaflowTask: got datastore for input path %s" % input_path)
|
|
303
|
+
|
|
279
304
|
if not ds_list:
|
|
280
305
|
# this guards against errors in input paths
|
|
281
306
|
raise MetaflowDataMissing(
|
|
@@ -546,6 +571,8 @@ class MetaflowTask(object):
|
|
|
546
571
|
split_index,
|
|
547
572
|
retry_count,
|
|
548
573
|
max_user_code_retries,
|
|
574
|
+
whitelist_decorators=None,
|
|
575
|
+
persist=True,
|
|
549
576
|
):
|
|
550
577
|
if run_id and task_id:
|
|
551
578
|
self.metadata.register_run_id(run_id)
|
|
@@ -604,7 +631,14 @@ class MetaflowTask(object):
|
|
|
604
631
|
|
|
605
632
|
step_func = getattr(self.flow, step_name)
|
|
606
633
|
decorators = step_func.decorators
|
|
607
|
-
|
|
634
|
+
if self.orig_flow_datastore:
|
|
635
|
+
# We filter only the whitelisted decorators in case of spin step.
|
|
636
|
+
decorators = (
|
|
637
|
+
[]
|
|
638
|
+
if not whitelist_decorators
|
|
639
|
+
else [deco for deco in decorators if deco.name in whitelist_decorators]
|
|
640
|
+
)
|
|
641
|
+
from_start("MetaflowTask: decorators initialized")
|
|
608
642
|
node = self.flow._graph[step_name]
|
|
609
643
|
join_type = None
|
|
610
644
|
if node.type == "join":
|
|
@@ -612,17 +646,20 @@ class MetaflowTask(object):
|
|
|
612
646
|
|
|
613
647
|
# 1. initialize output datastore
|
|
614
648
|
output = self.flow_datastore.get_task_datastore(
|
|
615
|
-
run_id, step_name, task_id, attempt=retry_count, mode="w"
|
|
649
|
+
run_id, step_name, task_id, attempt=retry_count, mode="w", persist=persist
|
|
616
650
|
)
|
|
617
651
|
|
|
618
652
|
output.init_task()
|
|
653
|
+
from_start("MetaflowTask: output datastore initialized")
|
|
619
654
|
|
|
620
655
|
if input_paths:
|
|
621
656
|
# 2. initialize input datastores
|
|
622
657
|
inputs = self._init_data(run_id, join_type, input_paths)
|
|
658
|
+
from_start("MetaflowTask: input datastores initialized")
|
|
623
659
|
|
|
624
660
|
# 3. initialize foreach state
|
|
625
661
|
self._init_foreach(step_name, join_type, inputs, split_index)
|
|
662
|
+
from_start("MetaflowTask: foreach state initialized")
|
|
626
663
|
|
|
627
664
|
# 4. initialize the iteration state
|
|
628
665
|
is_recursive_step = (
|
|
@@ -681,7 +718,7 @@ class MetaflowTask(object):
|
|
|
681
718
|
),
|
|
682
719
|
]
|
|
683
720
|
)
|
|
684
|
-
|
|
721
|
+
from_start("MetaflowTask: finished input processing")
|
|
685
722
|
self.metadata.register_metadata(
|
|
686
723
|
run_id,
|
|
687
724
|
step_name,
|
|
@@ -735,8 +772,11 @@ class MetaflowTask(object):
|
|
|
735
772
|
"project_flow_name": current.get("project_flow_name"),
|
|
736
773
|
"trace_id": trace_id or None,
|
|
737
774
|
}
|
|
775
|
+
|
|
776
|
+
from_start("MetaflowTask: task metadata initialized")
|
|
738
777
|
start = time.time()
|
|
739
778
|
self.metadata.start_task_heartbeat(self.flow.name, run_id, step_name, task_id)
|
|
779
|
+
from_start("MetaflowTask: heartbeat started")
|
|
740
780
|
with self.monitor.measure("metaflow.task.duration"):
|
|
741
781
|
try:
|
|
742
782
|
with self.monitor.count("metaflow.task.start"):
|
|
@@ -756,7 +796,6 @@ class MetaflowTask(object):
|
|
|
756
796
|
# should either be set prior to running the user code or listed in
|
|
757
797
|
# FlowSpec._EPHEMERAL to allow for proper merging/importing of
|
|
758
798
|
# user artifacts in the user's step code.
|
|
759
|
-
|
|
760
799
|
if join_type:
|
|
761
800
|
# Join step:
|
|
762
801
|
|
|
@@ -815,11 +854,19 @@ class MetaflowTask(object):
|
|
|
815
854
|
"graph_info": self.flow._graph_info,
|
|
816
855
|
}
|
|
817
856
|
)
|
|
857
|
+
from_start("MetaflowTask: before pre-step decorators")
|
|
818
858
|
for deco in decorators:
|
|
859
|
+
if deco.name == "card" and self.orig_flow_datastore:
|
|
860
|
+
# if spin step and card decorator, pass spin metadata
|
|
861
|
+
metadata = [m for m in METADATA_PROVIDERS if m.TYPE == "spin"][
|
|
862
|
+
0
|
|
863
|
+
](self.environment, self.flow, self.event_logger, self.monitor)
|
|
864
|
+
else:
|
|
865
|
+
metadata = self.metadata
|
|
819
866
|
deco.task_pre_step(
|
|
820
867
|
step_name,
|
|
821
868
|
output,
|
|
822
|
-
|
|
869
|
+
metadata,
|
|
823
870
|
run_id,
|
|
824
871
|
task_id,
|
|
825
872
|
self.flow,
|
|
@@ -845,12 +892,12 @@ class MetaflowTask(object):
|
|
|
845
892
|
max_user_code_retries,
|
|
846
893
|
self.ubf_context,
|
|
847
894
|
)
|
|
848
|
-
|
|
895
|
+
from_start("MetaflowTask: finished decorator processing")
|
|
849
896
|
if join_type:
|
|
850
897
|
self._exec_step_function(step_func, orig_step_func, input_obj)
|
|
851
898
|
else:
|
|
852
899
|
self._exec_step_function(step_func, orig_step_func)
|
|
853
|
-
|
|
900
|
+
from_start("MetaflowTask: step function executed")
|
|
854
901
|
for deco in decorators:
|
|
855
902
|
deco.task_post_step(
|
|
856
903
|
step_name,
|
|
@@ -893,6 +940,7 @@ class MetaflowTask(object):
|
|
|
893
940
|
raise
|
|
894
941
|
|
|
895
942
|
finally:
|
|
943
|
+
from_start("MetaflowTask: decorators finalized")
|
|
896
944
|
if self.ubf_context == UBF_CONTROL:
|
|
897
945
|
self._finalize_control_task()
|
|
898
946
|
|
|
@@ -932,7 +980,7 @@ class MetaflowTask(object):
|
|
|
932
980
|
)
|
|
933
981
|
|
|
934
982
|
output.save_metadata({"task_end": {}})
|
|
935
|
-
|
|
983
|
+
from_start("MetaflowTask: output persisted")
|
|
936
984
|
# this writes a success marker indicating that the
|
|
937
985
|
# "transaction" is done
|
|
938
986
|
output.done()
|
|
@@ -961,3 +1009,4 @@ class MetaflowTask(object):
|
|
|
961
1009
|
name="duration",
|
|
962
1010
|
payload={**task_payload, "msg": str(duration)},
|
|
963
1011
|
)
|
|
1012
|
+
from_start("MetaflowTask: task run completed")
|
|
@@ -379,7 +379,7 @@ class DelayEvaluator(collections.abc.Mapping):
|
|
|
379
379
|
to_eval_expr,
|
|
380
380
|
self._globals or globals(),
|
|
381
381
|
{
|
|
382
|
-
k: ConfigValue(v)
|
|
382
|
+
k: ConfigValue(v) if v is not None else None
|
|
383
383
|
for k, v in flow_cls._flow_state.get(_FlowState.CONFIGS, {}).items()
|
|
384
384
|
},
|
|
385
385
|
)
|
|
@@ -507,9 +507,7 @@ class Config(Parameter, collections.abc.Mapping):
|
|
|
507
507
|
self._delayed_evaluator = None
|
|
508
508
|
|
|
509
509
|
def load_parameter(self, v):
|
|
510
|
-
if v is None
|
|
511
|
-
return None
|
|
512
|
-
return ConfigValue(v)
|
|
510
|
+
return ConfigValue(v) if v is not None else None
|
|
513
511
|
|
|
514
512
|
def _store_value(self, v: Any) -> None:
|
|
515
513
|
self._computed_value = v
|
|
@@ -114,7 +114,7 @@ class MutableFlow:
|
|
|
114
114
|
for name, value in self._flow_cls._flow_state.get(
|
|
115
115
|
_FlowState.CONFIGS, {}
|
|
116
116
|
).items():
|
|
117
|
-
r = name, ConfigValue(value)
|
|
117
|
+
r = name, ConfigValue(value) if value is not None else None
|
|
118
118
|
debug.userconf_exec("Mutable flow yielding config: %s" % str(r))
|
|
119
119
|
yield r
|
|
120
120
|
|