metaflow 2.18.12__py2.py3-none-any.whl → 2.19.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +1 -0
- metaflow/cli.py +78 -13
- metaflow/cli_components/run_cmds.py +182 -39
- metaflow/cli_components/step_cmd.py +160 -4
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +162 -99
- metaflow/client/filecache.py +59 -32
- metaflow/cmd/code/__init__.py +2 -1
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +40 -9
- metaflow/datastore/datastore_set.py +10 -1
- metaflow/datastore/flow_datastore.py +123 -4
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +86 -2
- metaflow/decorators.py +75 -6
- metaflow/extension_support/__init__.py +372 -305
- metaflow/flowspec.py +3 -2
- metaflow/graph.py +2 -2
- metaflow/metaflow_config.py +41 -0
- metaflow/metaflow_profile.py +18 -0
- metaflow/packaging_sys/utils.py +2 -39
- metaflow/packaging_sys/v1.py +63 -16
- metaflow/plugins/__init__.py +2 -0
- metaflow/plugins/argo/argo_workflows.py +20 -25
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/cards/card_datastore.py +13 -13
- metaflow/plugins/cards/card_decorator.py +1 -0
- metaflow/plugins/cards/card_modules/basic.py +9 -3
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/s3/s3.py +29 -10
- metaflow/plugins/datatools/s3/s3op.py +90 -62
- metaflow/plugins/metadata_providers/local.py +76 -82
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/runner/click_api.py +4 -2
- metaflow/runner/metaflow_runner.py +210 -19
- metaflow/runtime.py +348 -21
- metaflow/task.py +61 -12
- metaflow/user_configs/config_parameters.py +2 -4
- metaflow/user_decorators/mutable_flow.py +1 -1
- metaflow/user_decorators/user_step_decorator.py +10 -1
- metaflow/util.py +191 -1
- metaflow/version.py +1 -1
- {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Makefile +10 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/METADATA +2 -4
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/RECORD +52 -48
- {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Tiltfile +0 -0
- {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/WHEEL +0 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/entry_points.txt +0 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/licenses/LICENSE +0 -0
- {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
import itertools
|
|
2
2
|
import json
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
3
4
|
|
|
4
5
|
from .. import metaflow_config
|
|
5
6
|
|
|
6
7
|
from .content_addressed_store import ContentAddressedStore
|
|
7
8
|
from .task_datastore import TaskDataStore
|
|
9
|
+
from .spin_datastore import SpinTaskDatastore
|
|
10
|
+
from ..metaflow_profile import from_start
|
|
8
11
|
|
|
9
12
|
|
|
10
13
|
class FlowDataStore(object):
|
|
@@ -63,10 +66,16 @@ class FlowDataStore(object):
|
|
|
63
66
|
self._storage_impl.path_join(self.flow_name, "data"), self._storage_impl
|
|
64
67
|
)
|
|
65
68
|
|
|
69
|
+
# Private
|
|
70
|
+
self._metadata_cache = None
|
|
71
|
+
|
|
66
72
|
@property
|
|
67
73
|
def datastore_root(self):
|
|
68
74
|
return self._storage_impl.datastore_root
|
|
69
75
|
|
|
76
|
+
def set_metadata_cache(self, cache):
|
|
77
|
+
self._metadata_cache = cache
|
|
78
|
+
|
|
70
79
|
def get_task_datastores(
|
|
71
80
|
self,
|
|
72
81
|
run_id=None,
|
|
@@ -76,6 +85,9 @@ class FlowDataStore(object):
|
|
|
76
85
|
attempt=None,
|
|
77
86
|
include_prior=False,
|
|
78
87
|
mode="r",
|
|
88
|
+
join_type=None,
|
|
89
|
+
orig_flow_datastore=None,
|
|
90
|
+
spin_artifacts=None,
|
|
79
91
|
):
|
|
80
92
|
"""
|
|
81
93
|
Return a list of TaskDataStore for a subset of the tasks.
|
|
@@ -95,7 +107,7 @@ class FlowDataStore(object):
|
|
|
95
107
|
Steps to get the tasks from. If run_id is specified, this
|
|
96
108
|
must also be specified, by default None
|
|
97
109
|
pathspecs : List[str], optional
|
|
98
|
-
Full task specs (run_id/step_name/task_id). Can be used instead of
|
|
110
|
+
Full task specs (run_id/step_name/task_id[/attempt]). Can be used instead of
|
|
99
111
|
specifying run_id and steps, by default None
|
|
100
112
|
allow_not_done : bool, optional
|
|
101
113
|
If True, returns the latest attempt of a task even if that attempt
|
|
@@ -106,6 +118,16 @@ class FlowDataStore(object):
|
|
|
106
118
|
If True, returns all attempts up to and including attempt.
|
|
107
119
|
mode : str, default "r"
|
|
108
120
|
Mode to initialize the returned TaskDataStores in.
|
|
121
|
+
join_type : str, optional, default None
|
|
122
|
+
If specified, the join type for the task. This is used to determine
|
|
123
|
+
the user specified artifacts for the task in case of a spin task.
|
|
124
|
+
orig_flow_datastore : MetadataProvider, optional, default None
|
|
125
|
+
The metadata provider in case of a spin task. If provided, the
|
|
126
|
+
returned TaskDataStore will be a SpinTaskDatastore instead of a
|
|
127
|
+
TaskDataStore.
|
|
128
|
+
spin_artifacts : Dict[str, Any], optional, default None
|
|
129
|
+
Artifacts provided by user that can override the artifacts fetched via the
|
|
130
|
+
spin pathspec.
|
|
109
131
|
|
|
110
132
|
Returns
|
|
111
133
|
-------
|
|
@@ -145,7 +167,13 @@ class FlowDataStore(object):
|
|
|
145
167
|
if attempt is not None and attempt <= metaflow_config.MAX_ATTEMPTS - 1:
|
|
146
168
|
attempt_range = range(attempt + 1) if include_prior else [attempt]
|
|
147
169
|
for task_url in task_urls:
|
|
148
|
-
|
|
170
|
+
task_splits = task_url.split("/")
|
|
171
|
+
# Usually it is flow, run, step, task (so 4 components) -- if we have a
|
|
172
|
+
# fifth one, there is a specific attempt number listed as well.
|
|
173
|
+
task_attempt_range = attempt_range
|
|
174
|
+
if len(task_splits) == 5:
|
|
175
|
+
task_attempt_range = [int(task_splits[4])]
|
|
176
|
+
for attempt in task_attempt_range:
|
|
149
177
|
for suffix in [
|
|
150
178
|
TaskDataStore.METADATA_DATA_SUFFIX,
|
|
151
179
|
TaskDataStore.METADATA_ATTEMPT_SUFFIX,
|
|
@@ -198,7 +226,18 @@ class FlowDataStore(object):
|
|
|
198
226
|
else (latest_started_attempts & done_attempts)
|
|
199
227
|
)
|
|
200
228
|
latest_to_fetch = [
|
|
201
|
-
(
|
|
229
|
+
(
|
|
230
|
+
v[0],
|
|
231
|
+
v[1],
|
|
232
|
+
v[2],
|
|
233
|
+
v[3],
|
|
234
|
+
data_objs.get(v),
|
|
235
|
+
mode,
|
|
236
|
+
allow_not_done,
|
|
237
|
+
join_type,
|
|
238
|
+
orig_flow_datastore,
|
|
239
|
+
spin_artifacts,
|
|
240
|
+
)
|
|
202
241
|
for v in latest_to_fetch
|
|
203
242
|
]
|
|
204
243
|
return list(itertools.starmap(self.get_task_datastore, latest_to_fetch))
|
|
@@ -212,8 +251,63 @@ class FlowDataStore(object):
|
|
|
212
251
|
data_metadata=None,
|
|
213
252
|
mode="r",
|
|
214
253
|
allow_not_done=False,
|
|
254
|
+
join_type=None,
|
|
255
|
+
orig_flow_datastore=None,
|
|
256
|
+
spin_artifacts=None,
|
|
257
|
+
persist=True,
|
|
215
258
|
):
|
|
216
|
-
|
|
259
|
+
if orig_flow_datastore is not None:
|
|
260
|
+
# In spin step subprocess, use SpinTaskDatastore for accessing artifacts
|
|
261
|
+
if join_type is not None:
|
|
262
|
+
# If join_type is specified, we need to use the artifacts corresponding
|
|
263
|
+
# to that particular join index, specified by the parent task pathspec.
|
|
264
|
+
spin_artifacts = spin_artifacts.get(
|
|
265
|
+
f"{run_id}/{step_name}/{task_id}", {}
|
|
266
|
+
)
|
|
267
|
+
from_start(
|
|
268
|
+
"FlowDataStore: get_task_datastore for spin task for type %s %s metadata"
|
|
269
|
+
% (self.TYPE, "without" if data_metadata is None else "with")
|
|
270
|
+
)
|
|
271
|
+
# Get the task datastore for the spun task.
|
|
272
|
+
orig_datastore = orig_flow_datastore.get_task_datastore(
|
|
273
|
+
run_id,
|
|
274
|
+
step_name,
|
|
275
|
+
task_id,
|
|
276
|
+
attempt=attempt,
|
|
277
|
+
data_metadata=data_metadata,
|
|
278
|
+
mode=mode,
|
|
279
|
+
allow_not_done=allow_not_done,
|
|
280
|
+
persist=persist,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
return SpinTaskDatastore(
|
|
284
|
+
self.flow_name,
|
|
285
|
+
run_id,
|
|
286
|
+
step_name,
|
|
287
|
+
task_id,
|
|
288
|
+
orig_datastore,
|
|
289
|
+
spin_artifacts,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
cache_hit = False
|
|
293
|
+
if (
|
|
294
|
+
self._metadata_cache is not None
|
|
295
|
+
and data_metadata is None
|
|
296
|
+
and attempt is not None
|
|
297
|
+
and allow_not_done is False
|
|
298
|
+
):
|
|
299
|
+
# If we have a metadata cache, we can try to load the metadata
|
|
300
|
+
# from the cache if it is not provided.
|
|
301
|
+
data_metadata = self._metadata_cache.load_metadata(
|
|
302
|
+
run_id, step_name, task_id, attempt
|
|
303
|
+
)
|
|
304
|
+
cache_hit = data_metadata is not None
|
|
305
|
+
|
|
306
|
+
from_start(
|
|
307
|
+
"FlowDataStore: get_task_datastore for regular task for type %s %s metadata"
|
|
308
|
+
% (self.TYPE, "without" if data_metadata is None else "with")
|
|
309
|
+
)
|
|
310
|
+
task_datastore = TaskDataStore(
|
|
217
311
|
self,
|
|
218
312
|
run_id,
|
|
219
313
|
step_name,
|
|
@@ -222,8 +316,23 @@ class FlowDataStore(object):
|
|
|
222
316
|
data_metadata=data_metadata,
|
|
223
317
|
mode=mode,
|
|
224
318
|
allow_not_done=allow_not_done,
|
|
319
|
+
persist=persist,
|
|
225
320
|
)
|
|
226
321
|
|
|
322
|
+
# Only persist in cache if it is non-changing (so done only) and we have
|
|
323
|
+
# a non-None attempt
|
|
324
|
+
if (
|
|
325
|
+
not cache_hit
|
|
326
|
+
and self._metadata_cache is not None
|
|
327
|
+
and allow_not_done is False
|
|
328
|
+
and attempt is not None
|
|
329
|
+
):
|
|
330
|
+
self._metadata_cache.store_metadata(
|
|
331
|
+
run_id, step_name, task_id, attempt, task_datastore.ds_metadata
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
return task_datastore
|
|
335
|
+
|
|
227
336
|
def save_data(self, data_iter, len_hint=0):
|
|
228
337
|
"""Saves data to the underlying content-addressed store
|
|
229
338
|
|
|
@@ -265,3 +374,13 @@ class FlowDataStore(object):
|
|
|
265
374
|
"""
|
|
266
375
|
for key, blob in self.ca_store.load_blobs(keys, force_raw=force_raw):
|
|
267
376
|
yield key, blob
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
class MetadataCache(ABC):
|
|
380
|
+
@abstractmethod
|
|
381
|
+
def load_metadata(self, run_id, step_name, task_id, attempt):
|
|
382
|
+
raise NotImplementedError()
|
|
383
|
+
|
|
384
|
+
@abstractmethod
|
|
385
|
+
def store_metadata(self, run_id, step_name, task_id, attempt, metadata_dict):
|
|
386
|
+
raise NotImplementedError()
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from typing import Dict, Any
|
|
2
|
+
from .task_datastore import TaskDataStore, require_mode
|
|
3
|
+
from ..metaflow_profile import from_start
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SpinTaskDatastore(object):
|
|
7
|
+
def __init__(
|
|
8
|
+
self,
|
|
9
|
+
flow_name: str,
|
|
10
|
+
run_id: str,
|
|
11
|
+
step_name: str,
|
|
12
|
+
task_id: str,
|
|
13
|
+
orig_datastore: TaskDataStore,
|
|
14
|
+
spin_artifacts: Dict[str, Any],
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
SpinTaskDatastore is a datastore for a task that is used to retrieve
|
|
18
|
+
artifacts and attributes for a spin step. It uses the task pathspec
|
|
19
|
+
from a previous execution of the step to access the artifacts and attributes.
|
|
20
|
+
|
|
21
|
+
Parameters:
|
|
22
|
+
-----------
|
|
23
|
+
flow_name : str
|
|
24
|
+
Name of the flow
|
|
25
|
+
run_id : str
|
|
26
|
+
Run ID of the flow
|
|
27
|
+
step_name : str
|
|
28
|
+
Name of the step
|
|
29
|
+
task_id : str
|
|
30
|
+
Task ID of the step
|
|
31
|
+
orig_datastore : TaskDataStore
|
|
32
|
+
The datastore for the underlying task that is being spun.
|
|
33
|
+
spin_artifacts : Dict[str, Any]
|
|
34
|
+
User provided artifacts that are to be used in the spin task. This is a dictionary
|
|
35
|
+
where keys are artifact names and values are the actual data or metadata.
|
|
36
|
+
"""
|
|
37
|
+
self.flow_name = flow_name
|
|
38
|
+
self.run_id = run_id
|
|
39
|
+
self.step_name = step_name
|
|
40
|
+
self.task_id = task_id
|
|
41
|
+
self.orig_datastore = orig_datastore
|
|
42
|
+
self.spin_artifacts = spin_artifacts
|
|
43
|
+
self._task = None
|
|
44
|
+
|
|
45
|
+
# Update _objects and _info in order to persist artifacts
|
|
46
|
+
# See `persist` method in `TaskDatastore` for more details
|
|
47
|
+
self._objects = self.orig_datastore._objects.copy()
|
|
48
|
+
self._info = self.orig_datastore._info.copy()
|
|
49
|
+
|
|
50
|
+
# We strip out some of the control ones
|
|
51
|
+
for key in ("_transition",):
|
|
52
|
+
if key in self._objects:
|
|
53
|
+
del self._objects[key]
|
|
54
|
+
del self._info[key]
|
|
55
|
+
|
|
56
|
+
from_start("SpinTaskDatastore: Initialized artifacts")
|
|
57
|
+
|
|
58
|
+
@require_mode(None)
|
|
59
|
+
def __getitem__(self, name):
|
|
60
|
+
try:
|
|
61
|
+
# Check if it's an artifact in the spin_artifacts
|
|
62
|
+
return self.spin_artifacts[name]
|
|
63
|
+
except KeyError:
|
|
64
|
+
try:
|
|
65
|
+
# Check if it's an attribute of the task
|
|
66
|
+
# _foreach_stack, _foreach_index, ...
|
|
67
|
+
return self.orig_datastore[name]
|
|
68
|
+
except (KeyError, AttributeError) as e:
|
|
69
|
+
raise KeyError(
|
|
70
|
+
f"Attribute '{name}' not found in the previous execution "
|
|
71
|
+
f"of the tasks for `{self.step_name}`."
|
|
72
|
+
) from e
|
|
73
|
+
|
|
74
|
+
@require_mode(None)
|
|
75
|
+
def is_none(self, name):
|
|
76
|
+
val = self.__getitem__(name)
|
|
77
|
+
return val is None
|
|
78
|
+
|
|
79
|
+
@require_mode(None)
|
|
80
|
+
def __contains__(self, name):
|
|
81
|
+
try:
|
|
82
|
+
_ = self.__getitem__(name)
|
|
83
|
+
return True
|
|
84
|
+
except KeyError:
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
@require_mode(None)
|
|
88
|
+
def items(self):
|
|
89
|
+
if self._objects:
|
|
90
|
+
return self._objects.items()
|
|
91
|
+
return {}
|
|
@@ -6,6 +6,7 @@ import time
|
|
|
6
6
|
|
|
7
7
|
from functools import wraps
|
|
8
8
|
from io import BufferedIOBase, FileIO, RawIOBase
|
|
9
|
+
from typing import List, Optional
|
|
9
10
|
from types import MethodType, FunctionType
|
|
10
11
|
|
|
11
12
|
from .. import metaflow_config
|
|
@@ -98,6 +99,7 @@ class TaskDataStore(object):
|
|
|
98
99
|
data_metadata=None,
|
|
99
100
|
mode="r",
|
|
100
101
|
allow_not_done=False,
|
|
102
|
+
persist=True,
|
|
101
103
|
):
|
|
102
104
|
self._storage_impl = flow_datastore._storage_impl
|
|
103
105
|
self.TYPE = self._storage_impl.TYPE
|
|
@@ -113,6 +115,7 @@ class TaskDataStore(object):
|
|
|
113
115
|
self._attempt = attempt
|
|
114
116
|
self._metadata = flow_datastore.metadata
|
|
115
117
|
self._parent = flow_datastore
|
|
118
|
+
self._persist = persist
|
|
116
119
|
|
|
117
120
|
# The GZIP encodings are for backward compatibility
|
|
118
121
|
self._encodings = {"pickle-v2", "gzip+pickle-v2"}
|
|
@@ -148,6 +151,8 @@ class TaskDataStore(object):
|
|
|
148
151
|
)
|
|
149
152
|
if self.has_metadata(check_meta, add_attempt=False):
|
|
150
153
|
max_attempt = i
|
|
154
|
+
elif max_attempt is not None:
|
|
155
|
+
break
|
|
151
156
|
if self._attempt is None:
|
|
152
157
|
self._attempt = max_attempt
|
|
153
158
|
elif max_attempt is None or self._attempt > max_attempt:
|
|
@@ -253,6 +258,72 @@ class TaskDataStore(object):
|
|
|
253
258
|
"""
|
|
254
259
|
self.save_metadata({self.METADATA_ATTEMPT_SUFFIX: {"time": time.time()}})
|
|
255
260
|
|
|
261
|
+
@only_if_not_done
|
|
262
|
+
@require_mode("w")
|
|
263
|
+
def transfer_artifacts(
|
|
264
|
+
self, other_datastore: "TaskDataStore", names: Optional[List[str]] = None
|
|
265
|
+
):
|
|
266
|
+
"""
|
|
267
|
+
Copies the blobs from other_datastore to this datastore if the datastore roots
|
|
268
|
+
are different.
|
|
269
|
+
|
|
270
|
+
This is used specifically for spin so we can bring in artifacts from the original
|
|
271
|
+
datastore.
|
|
272
|
+
|
|
273
|
+
Parameters
|
|
274
|
+
----------
|
|
275
|
+
other_datastore : TaskDataStore
|
|
276
|
+
Other datastore from which to copy artifacts from
|
|
277
|
+
names : List[str], optional, default None
|
|
278
|
+
If provided, only transfer the artifacts with these names. If None,
|
|
279
|
+
transfer all artifacts from the other datastore.
|
|
280
|
+
"""
|
|
281
|
+
if (
|
|
282
|
+
other_datastore.TYPE == self.TYPE
|
|
283
|
+
and other_datastore._storage_impl.datastore_root
|
|
284
|
+
== self._storage_impl.datastore_root
|
|
285
|
+
):
|
|
286
|
+
# Nothing to transfer -- artifacts are already saved properly
|
|
287
|
+
return
|
|
288
|
+
|
|
289
|
+
# Determine which artifacts need to be transferred
|
|
290
|
+
if names is None:
|
|
291
|
+
# Transfer all artifacts from other datastore
|
|
292
|
+
artifacts_to_transfer = list(other_datastore._objects.keys())
|
|
293
|
+
else:
|
|
294
|
+
# Transfer only specified artifacts
|
|
295
|
+
artifacts_to_transfer = [
|
|
296
|
+
name for name in names if name in other_datastore._objects
|
|
297
|
+
]
|
|
298
|
+
|
|
299
|
+
if not artifacts_to_transfer:
|
|
300
|
+
return
|
|
301
|
+
|
|
302
|
+
# Get SHA keys for artifacts to transfer
|
|
303
|
+
shas_to_transfer = [
|
|
304
|
+
other_datastore._objects[name] for name in artifacts_to_transfer
|
|
305
|
+
]
|
|
306
|
+
|
|
307
|
+
# Check which blobs are missing locally
|
|
308
|
+
missing_shas = []
|
|
309
|
+
for sha in shas_to_transfer:
|
|
310
|
+
local_path = self._ca_store._storage_impl.path_join(
|
|
311
|
+
self._ca_store._prefix, sha[:2], sha
|
|
312
|
+
)
|
|
313
|
+
if not self._ca_store._storage_impl.is_file([local_path])[0]:
|
|
314
|
+
missing_shas.append(sha)
|
|
315
|
+
|
|
316
|
+
if not missing_shas:
|
|
317
|
+
return # All blobs already exist locally
|
|
318
|
+
|
|
319
|
+
# Load blobs from other datastore in transfer mode
|
|
320
|
+
transfer_blobs = other_datastore._ca_store.load_blobs(
|
|
321
|
+
missing_shas, is_transfer=True
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
# Save blobs to local datastore in transfer mode
|
|
325
|
+
self._ca_store.save_blobs(transfer_blobs, is_transfer=True)
|
|
326
|
+
|
|
256
327
|
@only_if_not_done
|
|
257
328
|
@require_mode("w")
|
|
258
329
|
def save_artifacts(self, artifacts_iter, len_hint=0):
|
|
@@ -683,14 +754,16 @@ class TaskDataStore(object):
|
|
|
683
754
|
flow : FlowSpec
|
|
684
755
|
Flow to persist
|
|
685
756
|
"""
|
|
757
|
+
if not self._persist:
|
|
758
|
+
return
|
|
686
759
|
|
|
687
760
|
if flow._datastore:
|
|
688
761
|
self._objects.update(flow._datastore._objects)
|
|
689
762
|
self._info.update(flow._datastore._info)
|
|
690
763
|
|
|
691
|
-
#
|
|
692
|
-
# artifacts_iter, so we can provide a len_hint below
|
|
764
|
+
# Scan flow object FIRST
|
|
693
765
|
valid_artifacts = []
|
|
766
|
+
current_artifact_names = set()
|
|
694
767
|
for var in dir(flow):
|
|
695
768
|
if var.startswith("__") or var in flow._EPHEMERAL:
|
|
696
769
|
continue
|
|
@@ -707,6 +780,16 @@ class TaskDataStore(object):
|
|
|
707
780
|
or isinstance(val, Parameter)
|
|
708
781
|
):
|
|
709
782
|
valid_artifacts.append((var, val))
|
|
783
|
+
current_artifact_names.add(var)
|
|
784
|
+
|
|
785
|
+
# Transfer ONLY artifacts that aren't being overridden
|
|
786
|
+
if hasattr(flow._datastore, "orig_datastore"):
|
|
787
|
+
parent_artifacts = set(flow._datastore._objects.keys())
|
|
788
|
+
unchanged_artifacts = parent_artifacts - current_artifact_names
|
|
789
|
+
if unchanged_artifacts:
|
|
790
|
+
self.transfer_artifacts(
|
|
791
|
+
flow._datastore.orig_datastore, names=list(unchanged_artifacts)
|
|
792
|
+
)
|
|
710
793
|
|
|
711
794
|
def artifacts_iter():
|
|
712
795
|
# we consume the valid_artifacts list destructively to
|
|
@@ -722,6 +805,7 @@ class TaskDataStore(object):
|
|
|
722
805
|
delattr(flow, var)
|
|
723
806
|
yield var, val
|
|
724
807
|
|
|
808
|
+
# Save current artifacts
|
|
725
809
|
self.save_artifacts(artifacts_iter(), len_hint=len(valid_artifacts))
|
|
726
810
|
|
|
727
811
|
@only_if_not_done
|
metaflow/decorators.py
CHANGED
|
@@ -27,7 +27,7 @@ from .user_decorators.user_step_decorator import (
|
|
|
27
27
|
UserStepDecoratorBase,
|
|
28
28
|
UserStepDecoratorMeta,
|
|
29
29
|
)
|
|
30
|
-
|
|
30
|
+
from .metaflow_config import SPIN_ALLOWED_DECORATORS
|
|
31
31
|
from metaflow._vendor import click
|
|
32
32
|
|
|
33
33
|
|
|
@@ -658,6 +658,50 @@ def _attach_decorators_to_step(step, decospecs):
|
|
|
658
658
|
step_deco.add_or_raise(step, False, 1, None)
|
|
659
659
|
|
|
660
660
|
|
|
661
|
+
def _should_skip_decorator_for_spin(
|
|
662
|
+
deco, is_spin, skip_decorators, logger, decorator_type="decorator"
|
|
663
|
+
):
|
|
664
|
+
"""
|
|
665
|
+
Determine if a decorator should be skipped for spin steps.
|
|
666
|
+
|
|
667
|
+
Parameters:
|
|
668
|
+
-----------
|
|
669
|
+
deco : Decorator
|
|
670
|
+
The decorator instance to check
|
|
671
|
+
is_spin : bool
|
|
672
|
+
Whether this is a spin step
|
|
673
|
+
skip_decorators : bool
|
|
674
|
+
Whether to skip all decorators
|
|
675
|
+
logger : callable
|
|
676
|
+
Logger function for warnings
|
|
677
|
+
decorator_type : str
|
|
678
|
+
Type of decorator ("Flow decorator" or "Step decorator") for logging
|
|
679
|
+
|
|
680
|
+
Returns:
|
|
681
|
+
--------
|
|
682
|
+
bool
|
|
683
|
+
True if the decorator should be skipped, False otherwise
|
|
684
|
+
"""
|
|
685
|
+
if not is_spin:
|
|
686
|
+
return False
|
|
687
|
+
|
|
688
|
+
# Skip all decorator hooks if skip_decorators is True
|
|
689
|
+
if skip_decorators:
|
|
690
|
+
return True
|
|
691
|
+
|
|
692
|
+
# Run decorator hooks for spin steps only if they are in the whitelist
|
|
693
|
+
if deco.name not in SPIN_ALLOWED_DECORATORS:
|
|
694
|
+
logger(
|
|
695
|
+
f"[Warning] Ignoring {decorator_type} '{deco.name}' as it is not supported in spin steps.",
|
|
696
|
+
system_msg=True,
|
|
697
|
+
timestamp=False,
|
|
698
|
+
bad=True,
|
|
699
|
+
)
|
|
700
|
+
return True
|
|
701
|
+
|
|
702
|
+
return False
|
|
703
|
+
|
|
704
|
+
|
|
661
705
|
def _init(flow, only_non_static=False):
|
|
662
706
|
for decorators in flow._flow_decorators.values():
|
|
663
707
|
for deco in decorators:
|
|
@@ -673,7 +717,16 @@ def _init(flow, only_non_static=False):
|
|
|
673
717
|
|
|
674
718
|
|
|
675
719
|
def _init_flow_decorators(
|
|
676
|
-
flow,
|
|
720
|
+
flow,
|
|
721
|
+
graph,
|
|
722
|
+
environment,
|
|
723
|
+
flow_datastore,
|
|
724
|
+
metadata,
|
|
725
|
+
logger,
|
|
726
|
+
echo,
|
|
727
|
+
deco_options,
|
|
728
|
+
is_spin=False,
|
|
729
|
+
skip_decorators=False,
|
|
677
730
|
):
|
|
678
731
|
# Since all flow decorators are stored as `{key:[deco]}` we iterate through each of them.
|
|
679
732
|
for decorators in flow._flow_decorators.values():
|
|
@@ -702,6 +755,10 @@ def _init_flow_decorators(
|
|
|
702
755
|
for option, option_info in deco.options.items()
|
|
703
756
|
}
|
|
704
757
|
for deco in decorators:
|
|
758
|
+
if _should_skip_decorator_for_spin(
|
|
759
|
+
deco, is_spin, skip_decorators, logger, "Flow decorator"
|
|
760
|
+
):
|
|
761
|
+
continue
|
|
705
762
|
deco.flow_init(
|
|
706
763
|
flow,
|
|
707
764
|
graph,
|
|
@@ -714,8 +771,16 @@ def _init_flow_decorators(
|
|
|
714
771
|
)
|
|
715
772
|
|
|
716
773
|
|
|
717
|
-
def _init_step_decorators(
|
|
718
|
-
|
|
774
|
+
def _init_step_decorators(
|
|
775
|
+
flow,
|
|
776
|
+
graph,
|
|
777
|
+
environment,
|
|
778
|
+
flow_datastore,
|
|
779
|
+
logger,
|
|
780
|
+
is_spin=False,
|
|
781
|
+
skip_decorators=False,
|
|
782
|
+
):
|
|
783
|
+
# NOTE: We don't need the graph but keeping it for backwards compatibility with
|
|
719
784
|
# extensions that use it directly. We will remove it at some point.
|
|
720
785
|
|
|
721
786
|
# We call the mutate method for both the flow and step mutators.
|
|
@@ -741,7 +806,7 @@ def _init_step_decorators(flow, graph, environment, flow_datastore, logger):
|
|
|
741
806
|
"expected %s but got %s" % (deco._flow_cls.__name__, cls.__name__)
|
|
742
807
|
)
|
|
743
808
|
debug.userconf_exec(
|
|
744
|
-
"Evaluating flow level decorator %s (
|
|
809
|
+
"Evaluating flow level decorator %s (mutate)" % deco.__class__.__name__
|
|
745
810
|
)
|
|
746
811
|
deco.mutate(mutable_flow)
|
|
747
812
|
# We reset cached_parameters on the very off chance that the user added
|
|
@@ -759,7 +824,7 @@ def _init_step_decorators(flow, graph, environment, flow_datastore, logger):
|
|
|
759
824
|
|
|
760
825
|
if isinstance(deco, StepMutator):
|
|
761
826
|
debug.userconf_exec(
|
|
762
|
-
"Evaluating step level decorator %s
|
|
827
|
+
"Evaluating step level decorator %s for %s (mutate)"
|
|
763
828
|
% (deco.__class__.__name__, step.name)
|
|
764
829
|
)
|
|
765
830
|
deco.mutate(
|
|
@@ -785,6 +850,10 @@ def _init_step_decorators(flow, graph, environment, flow_datastore, logger):
|
|
|
785
850
|
|
|
786
851
|
for step in flow:
|
|
787
852
|
for deco in step.decorators:
|
|
853
|
+
if _should_skip_decorator_for_spin(
|
|
854
|
+
deco, is_spin, skip_decorators, logger, "Step decorator"
|
|
855
|
+
):
|
|
856
|
+
continue
|
|
788
857
|
deco.step_init(
|
|
789
858
|
flow,
|
|
790
859
|
graph,
|