metaflow 2.18.12__py2.py3-none-any.whl → 2.19.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. metaflow/__init__.py +1 -0
  2. metaflow/cli.py +78 -13
  3. metaflow/cli_components/run_cmds.py +182 -39
  4. metaflow/cli_components/step_cmd.py +160 -4
  5. metaflow/client/__init__.py +1 -0
  6. metaflow/client/core.py +162 -99
  7. metaflow/client/filecache.py +59 -32
  8. metaflow/cmd/code/__init__.py +2 -1
  9. metaflow/datastore/__init__.py +1 -0
  10. metaflow/datastore/content_addressed_store.py +40 -9
  11. metaflow/datastore/datastore_set.py +10 -1
  12. metaflow/datastore/flow_datastore.py +123 -4
  13. metaflow/datastore/spin_datastore.py +91 -0
  14. metaflow/datastore/task_datastore.py +86 -2
  15. metaflow/decorators.py +75 -6
  16. metaflow/extension_support/__init__.py +372 -305
  17. metaflow/flowspec.py +3 -2
  18. metaflow/graph.py +2 -2
  19. metaflow/metaflow_config.py +41 -0
  20. metaflow/metaflow_profile.py +18 -0
  21. metaflow/packaging_sys/utils.py +2 -39
  22. metaflow/packaging_sys/v1.py +63 -16
  23. metaflow/plugins/__init__.py +2 -0
  24. metaflow/plugins/argo/argo_workflows.py +20 -25
  25. metaflow/plugins/argo/param_val.py +19 -0
  26. metaflow/plugins/cards/card_datastore.py +13 -13
  27. metaflow/plugins/cards/card_decorator.py +1 -0
  28. metaflow/plugins/cards/card_modules/basic.py +9 -3
  29. metaflow/plugins/datastores/local_storage.py +12 -6
  30. metaflow/plugins/datastores/spin_storage.py +12 -0
  31. metaflow/plugins/datatools/s3/s3.py +29 -10
  32. metaflow/plugins/datatools/s3/s3op.py +90 -62
  33. metaflow/plugins/metadata_providers/local.py +76 -82
  34. metaflow/plugins/metadata_providers/spin.py +16 -0
  35. metaflow/runner/click_api.py +4 -2
  36. metaflow/runner/metaflow_runner.py +210 -19
  37. metaflow/runtime.py +348 -21
  38. metaflow/task.py +61 -12
  39. metaflow/user_configs/config_parameters.py +2 -4
  40. metaflow/user_decorators/mutable_flow.py +1 -1
  41. metaflow/user_decorators/user_step_decorator.py +10 -1
  42. metaflow/util.py +191 -1
  43. metaflow/version.py +1 -1
  44. {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Makefile +10 -0
  45. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/METADATA +2 -4
  46. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/RECORD +52 -48
  47. {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/Tiltfile +0 -0
  48. {metaflow-2.18.12.data → metaflow-2.19.0.data}/data/share/metaflow/devtools/pick_services.sh +0 -0
  49. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/WHEEL +0 -0
  50. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/entry_points.txt +0 -0
  51. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/licenses/LICENSE +0 -0
  52. {metaflow-2.18.12.dist-info → metaflow-2.19.0.dist-info}/top_level.txt +0 -0
metaflow/runtime.py CHANGED
@@ -26,20 +26,29 @@ from metaflow.datastore.exceptions import DataException
26
26
  from contextlib import contextmanager
27
27
 
28
28
  from . import get_namespace
29
+ from .client.filecache import FileCache, FileBlobCache, TaskMetadataCache
29
30
  from .metadata_provider import MetaDatum
30
- from .metaflow_config import FEAT_ALWAYS_UPLOAD_CODE_PACKAGE, MAX_ATTEMPTS, UI_URL
31
+ from .metaflow_config import (
32
+ FEAT_ALWAYS_UPLOAD_CODE_PACKAGE,
33
+ MAX_ATTEMPTS,
34
+ UI_URL,
35
+ SPIN_ALLOWED_DECORATORS,
36
+ SPIN_DISALLOWED_DECORATORS,
37
+ )
38
+ from .metaflow_profile import from_start
39
+ from .plugins import DATASTORES
31
40
  from .exception import (
32
41
  MetaflowException,
33
42
  MetaflowInternalError,
34
43
  METAFLOW_EXIT_DISALLOW_RETRY,
35
44
  )
36
45
  from . import procpoll
37
- from .datastore import TaskDataStoreSet
46
+ from .datastore import FlowDataStore, TaskDataStoreSet
38
47
  from .debug import debug
39
48
  from .decorators import flow_decorators
40
49
  from .flowspec import _FlowState
41
50
  from .mflog import mflog, RUNTIME_LOG_SOURCE
42
- from .util import to_unicode, compress_list, unicode_type
51
+ from .util import to_unicode, compress_list, unicode_type, get_latest_task_pathspec
43
52
  from .clone_util import clone_task_helper
44
53
  from .unbounded_foreach import (
45
54
  CONTROL_TASK_TAG,
@@ -85,6 +94,253 @@ mflog_msg = partial(mflog.decorate, RUNTIME_LOG_SOURCE)
85
94
  # TODO option: output dot graph periodically about execution
86
95
 
87
96
 
97
+ class SpinRuntime(object):
98
+ def __init__(
99
+ self,
100
+ flow,
101
+ graph,
102
+ flow_datastore,
103
+ metadata,
104
+ environment,
105
+ package,
106
+ logger,
107
+ entrypoint,
108
+ event_logger,
109
+ monitor,
110
+ step_func,
111
+ step_name,
112
+ spin_pathspec,
113
+ skip_decorators=False,
114
+ artifacts_module=None,
115
+ persist=True,
116
+ max_log_size=MAX_LOG_SIZE,
117
+ ):
118
+ from metaflow import Task
119
+
120
+ self._flow = flow
121
+ self._graph = graph
122
+ self._flow_datastore = flow_datastore
123
+ self._metadata = metadata
124
+ self._environment = environment
125
+ self._package = package
126
+ self._logger = logger
127
+ self._entrypoint = entrypoint
128
+ self._event_logger = event_logger
129
+ self._monitor = monitor
130
+
131
+ self._step_func = step_func
132
+
133
+ # Determine if we have a complete pathspec or need to get the task
134
+ if spin_pathspec:
135
+ parts = spin_pathspec.split("/")
136
+ if len(parts) == 4:
137
+ # Complete pathspec: flow/run/step/task_id
138
+ try:
139
+ # If user provides whole pathspec, we do not need to check namespace
140
+ task = Task(spin_pathspec, _namespace_check=False)
141
+ except Exception:
142
+ raise MetaflowException(
143
+ f"Invalid pathspec: {spin_pathspec} for step: {step_name}"
144
+ )
145
+ elif len(parts) == 3:
146
+ # Partial pathspec: flow/run/step - need to get the task
147
+ _, run_id, _ = parts
148
+ task = get_latest_task_pathspec(flow.name, step_name, run_id=run_id)
149
+ logger(
150
+ f"To make spin even faster, provide complete pathspec with task_id: {task.pathspec}",
151
+ system_msg=True,
152
+ )
153
+ else:
154
+ raise MetaflowException(
155
+ f"Invalid pathspec format: {spin_pathspec}. Expected flow/run/step or flow/run/step/task_id"
156
+ )
157
+ else:
158
+ # No pathspec provided, get latest task for this step
159
+ task = get_latest_task_pathspec(flow.name, step_name)
160
+ logger(
161
+ f"To make spin even faster, provide complete pathspec {task.pathspec}",
162
+ system_msg=True,
163
+ )
164
+ from_start("SpinRuntime: after getting task")
165
+
166
+ # Get the original FlowDatastore so we can use it to access artifacts from the
167
+ # spun task
168
+ meta_dict = task.metadata_dict
169
+ ds_type = meta_dict["ds-type"]
170
+ ds_root = meta_dict["ds-root"]
171
+ orig_datastore_impl = [d for d in DATASTORES if d.TYPE == ds_type][0]
172
+ orig_datastore_impl.datastore_root = ds_root
173
+ spin_pathspec = task.pathspec
174
+ orig_flow_datastore = FlowDataStore(
175
+ flow.name,
176
+ environment=None,
177
+ storage_impl=orig_datastore_impl,
178
+ ds_root=ds_root,
179
+ )
180
+
181
+ self._filecache = FileCache()
182
+ orig_flow_datastore.set_metadata_cache(
183
+ TaskMetadataCache(self._filecache, ds_type, ds_root, flow.name)
184
+ )
185
+ orig_flow_datastore.ca_store.set_blob_cache(
186
+ FileBlobCache(
187
+ self._filecache, FileCache.flow_ds_id(ds_type, ds_root, flow.name)
188
+ )
189
+ )
190
+
191
+ self._orig_flow_datastore = orig_flow_datastore
192
+ self._spin_pathspec = spin_pathspec
193
+ self._persist = persist
194
+ self._spin_task = task
195
+ self._input_paths = None
196
+ self._split_index = None
197
+ self._whitelist_decorators = None
198
+ self._config_file_name = None
199
+ self._skip_decorators = skip_decorators
200
+ self._artifacts_module = artifacts_module
201
+ self._max_log_size = max_log_size
202
+ self._encoding = sys.stdout.encoding or "UTF-8"
203
+
204
+ # Create a new run_id for the spin task
205
+ self.run_id = self._metadata.new_run_id()
206
+ # Raise exception if we have a black listed decorator
207
+ for deco in self._step_func.decorators:
208
+ if deco.name in SPIN_DISALLOWED_DECORATORS:
209
+ raise MetaflowException(
210
+ f"Spinning steps with @{deco.name} decorator is not supported."
211
+ )
212
+
213
+ for deco in self.whitelist_decorators:
214
+ deco.runtime_init(flow, graph, package, self.run_id)
215
+ from_start("SpinRuntime: after init decorators")
216
+
217
+ @property
218
+ def split_index(self):
219
+ """
220
+ Returns the split index, caching the result after the first access.
221
+ """
222
+ if self._split_index is None:
223
+ self._split_index = getattr(self._spin_task, "index", None)
224
+
225
+ return self._split_index
226
+
227
+ @property
228
+ def input_paths(self):
229
+ def _format_input_paths(task_pathspec, attempt):
230
+ _, run_id, step_name, task_id = task_pathspec.split("/")
231
+ return f"{run_id}/{step_name}/{task_id}/{attempt}"
232
+
233
+ if self._input_paths:
234
+ return self._input_paths
235
+
236
+ if self._step_func.name == "start":
237
+ from metaflow import Step
238
+
239
+ flow_name, run_id, _, _ = self._spin_pathspec.split("/")
240
+ task = Step(
241
+ f"{flow_name}/{run_id}/_parameters", _namespace_check=False
242
+ ).task
243
+ self._input_paths = [
244
+ _format_input_paths(task.pathspec, task.current_attempt)
245
+ ]
246
+ else:
247
+ parent_tasks = self._spin_task.parent_tasks
248
+ self._input_paths = [
249
+ _format_input_paths(t.pathspec, t.current_attempt) for t in parent_tasks
250
+ ]
251
+ return self._input_paths
252
+
253
+ @property
254
+ def whitelist_decorators(self):
255
+ if self._skip_decorators:
256
+ self._whitelist_decorators = []
257
+ return self._whitelist_decorators
258
+ if self._whitelist_decorators:
259
+ return self._whitelist_decorators
260
+ self._whitelist_decorators = [
261
+ deco
262
+ for deco in self._step_func.decorators
263
+ if any(deco.name.startswith(prefix) for prefix in SPIN_ALLOWED_DECORATORS)
264
+ ]
265
+ return self._whitelist_decorators
266
+
267
+ def _new_task(self, step, input_paths=None, **kwargs):
268
+ return Task(
269
+ flow_datastore=self._flow_datastore,
270
+ flow=self._flow,
271
+ step=step,
272
+ run_id=self.run_id,
273
+ metadata=self._metadata,
274
+ environment=self._environment,
275
+ entrypoint=self._entrypoint,
276
+ event_logger=self._event_logger,
277
+ monitor=self._monitor,
278
+ input_paths=input_paths,
279
+ decos=self.whitelist_decorators,
280
+ logger=self._logger,
281
+ split_index=self.split_index,
282
+ **kwargs,
283
+ )
284
+
285
+ def execute(self):
286
+ exception = None
287
+ with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as config_file:
288
+ config_value = dump_config_values(self._flow)
289
+ if config_value:
290
+ json.dump(config_value, config_file)
291
+ config_file.flush()
292
+ self._config_file_name = config_file.name
293
+ else:
294
+ self._config_file_name = None
295
+ from_start("SpinRuntime: config values processed")
296
+ self.task = self._new_task(self._step_func.name, self.input_paths)
297
+ try:
298
+ self._launch_and_monitor_task()
299
+ except Exception as ex:
300
+ self._logger("Task failed.", system_msg=True, bad=True)
301
+ exception = ex
302
+ raise
303
+ finally:
304
+ for deco in self.whitelist_decorators:
305
+ deco.runtime_finished(exception)
306
+
307
+ def _launch_and_monitor_task(self):
308
+ worker = Worker(
309
+ self.task,
310
+ self._max_log_size,
311
+ self._config_file_name,
312
+ orig_flow_datastore=self._orig_flow_datastore,
313
+ spin_pathspec=self._spin_pathspec,
314
+ artifacts_module=self._artifacts_module,
315
+ persist=self._persist,
316
+ skip_decorators=self._skip_decorators,
317
+ )
318
+ from_start("SpinRuntime: created worker")
319
+
320
+ poll = procpoll.make_poll()
321
+ fds = worker.fds()
322
+ for fd in fds:
323
+ poll.add(fd)
324
+
325
+ active_fds = set(fds)
326
+
327
+ while active_fds:
328
+ events = poll.poll(POLL_TIMEOUT)
329
+ for event in events:
330
+ if event.can_read:
331
+ worker.read_logline(event.fd)
332
+ if event.is_terminated:
333
+ poll.remove(event.fd)
334
+ active_fds.remove(event.fd)
335
+ from_start("SpinRuntime: read loglines")
336
+ returncode = worker.terminate()
337
+ from_start("SpinRuntime: worker terminated")
338
+ if returncode != 0:
339
+ raise TaskFailed(self.task, f"Task failed with return code {returncode}")
340
+ else:
341
+ self._logger("Task finished successfully.", system_msg=True)
342
+
343
+
88
344
  class NativeRuntime(object):
89
345
  def __init__(
90
346
  self,
@@ -1769,8 +2025,27 @@ class CLIArgs(object):
1769
2025
  for step execution in StepDecorator.runtime_step_cli().
1770
2026
  """
1771
2027
 
1772
- def __init__(self, task):
2028
+ def __init__(
2029
+ self,
2030
+ task,
2031
+ orig_flow_datastore=None,
2032
+ spin_pathspec=None,
2033
+ artifacts_module=None,
2034
+ persist=True,
2035
+ skip_decorators=False,
2036
+ ):
1773
2037
  self.task = task
2038
+ if orig_flow_datastore is not None:
2039
+ self.orig_flow_datastore = "%s@%s" % (
2040
+ orig_flow_datastore.TYPE,
2041
+ orig_flow_datastore.datastore_root,
2042
+ )
2043
+ else:
2044
+ self.orig_flow_datastore = None
2045
+ self.spin_pathspec = spin_pathspec
2046
+ self.artifacts_module = artifacts_module
2047
+ self.persist = persist
2048
+ self.skip_decorators = skip_decorators
1774
2049
  self.entrypoint = list(task.entrypoint)
1775
2050
  step_obj = getattr(self.task.flow, self.task.step)
1776
2051
  self.top_level_options = {
@@ -1808,21 +2083,49 @@ class CLIArgs(object):
1808
2083
  (k, ConfigInput.make_key_name(k)) for k in configs
1809
2084
  ]
1810
2085
 
2086
+ if spin_pathspec:
2087
+ self.spin_args()
2088
+ else:
2089
+ self.default_args()
2090
+
2091
+ def default_args(self):
1811
2092
  self.commands = ["step"]
1812
2093
  self.command_args = [self.task.step]
1813
2094
  self.command_options = {
1814
- "run-id": task.run_id,
1815
- "task-id": task.task_id,
1816
- "input-paths": compress_list(task.input_paths),
1817
- "split-index": task.split_index,
1818
- "retry-count": task.retries,
1819
- "max-user-code-retries": task.user_code_retries,
1820
- "tag": task.tags,
2095
+ "run-id": self.task.run_id,
2096
+ "task-id": self.task.task_id,
2097
+ "input-paths": compress_list(self.task.input_paths),
2098
+ "split-index": self.task.split_index,
2099
+ "retry-count": self.task.retries,
2100
+ "max-user-code-retries": self.task.user_code_retries,
2101
+ "tag": self.task.tags,
1821
2102
  "namespace": get_namespace() or "",
1822
- "ubf-context": task.ubf_context,
2103
+ "ubf-context": self.task.ubf_context,
1823
2104
  }
1824
2105
  self.env = {}
1825
2106
 
2107
+ def spin_args(self):
2108
+ self.commands = ["spin-step"]
2109
+ self.command_args = [self.task.step]
2110
+
2111
+ self.command_options = {
2112
+ "run-id": self.task.run_id,
2113
+ "task-id": self.task.task_id,
2114
+ "input-paths": compress_list(self.task.input_paths),
2115
+ "split-index": self.task.split_index,
2116
+ "retry-count": self.task.retries,
2117
+ "max-user-code-retries": self.task.user_code_retries,
2118
+ "namespace": get_namespace() or "",
2119
+ "orig-flow-datastore": self.orig_flow_datastore,
2120
+ "artifacts-module": self.artifacts_module,
2121
+ "skip-decorators": self.skip_decorators,
2122
+ }
2123
+ if self.persist:
2124
+ self.command_options["persist"] = True
2125
+ else:
2126
+ self.command_options["no-persist"] = True
2127
+ self.env = {}
2128
+
1826
2129
  def get_args(self):
1827
2130
  # TODO: Make one with dict_to_cli_options; see cli_args.py for more detail
1828
2131
  def _options(mapping):
@@ -1861,9 +2164,24 @@ class CLIArgs(object):
1861
2164
 
1862
2165
 
1863
2166
  class Worker(object):
1864
- def __init__(self, task, max_logs_size, config_file_name):
2167
+ def __init__(
2168
+ self,
2169
+ task,
2170
+ max_logs_size,
2171
+ config_file_name,
2172
+ orig_flow_datastore=None,
2173
+ spin_pathspec=None,
2174
+ artifacts_module=None,
2175
+ persist=True,
2176
+ skip_decorators=False,
2177
+ ):
1865
2178
  self.task = task
1866
2179
  self._config_file_name = config_file_name
2180
+ self._orig_flow_datastore = orig_flow_datastore
2181
+ self._spin_pathspec = spin_pathspec
2182
+ self._artifacts_module = artifacts_module
2183
+ self._skip_decorators = skip_decorators
2184
+ self._persist = persist
1867
2185
  self._proc = self._launch()
1868
2186
 
1869
2187
  if task.retries > task.user_code_retries:
@@ -1895,7 +2213,14 @@ class Worker(object):
1895
2213
  # not it is properly shut down)
1896
2214
 
1897
2215
  def _launch(self):
1898
- args = CLIArgs(self.task)
2216
+ args = CLIArgs(
2217
+ self.task,
2218
+ orig_flow_datastore=self._orig_flow_datastore,
2219
+ spin_pathspec=self._spin_pathspec,
2220
+ artifacts_module=self._artifacts_module,
2221
+ persist=self._persist,
2222
+ skip_decorators=self._skip_decorators,
2223
+ )
1899
2224
  env = dict(os.environ)
1900
2225
 
1901
2226
  if self.task.clone_run_id:
@@ -1928,6 +2253,7 @@ class Worker(object):
1928
2253
  # by read_logline() below that relies on readline() not blocking
1929
2254
  # print('running', args)
1930
2255
  cmdline = args.get_args()
2256
+ from_start(f"Command line: {' '.join(cmdline)}")
1931
2257
  debug.subcommand_exec(cmdline)
1932
2258
  return subprocess.Popen(
1933
2259
  cmdline,
@@ -2050,13 +2376,14 @@ class Worker(object):
2050
2376
  else:
2051
2377
  self.emit_log(b"Task failed.", self._stderr, system_msg=True)
2052
2378
  else:
2053
- num = self.task.results["_foreach_num_splits"]
2054
- if num:
2055
- self.task.log(
2056
- "Foreach yields %d child steps." % num,
2057
- system_msg=True,
2058
- pid=self._proc.pid,
2059
- )
2379
+ if not self._spin_pathspec:
2380
+ num = self.task.results["_foreach_num_splits"]
2381
+ if num:
2382
+ self.task.log(
2383
+ "Foreach yields %d child steps." % num,
2384
+ system_msg=True,
2385
+ pid=self._proc.pid,
2386
+ )
2060
2387
  self.task.log(
2061
2388
  "Task finished successfully.", system_msg=True, pid=self._proc.pid
2062
2389
  )
metaflow/task.py CHANGED
@@ -6,14 +6,15 @@ import os
6
6
  import time
7
7
  import traceback
8
8
 
9
-
10
9
  from types import MethodType, FunctionType
11
10
 
12
11
  from metaflow.sidecar import Message, MessageTypes
13
12
  from metaflow.datastore.exceptions import DataException
14
13
 
14
+ from metaflow.plugins import METADATA_PROVIDERS
15
15
  from .metaflow_config import MAX_ATTEMPTS
16
16
  from .metadata_provider import MetaDatum
17
+ from .metaflow_profile import from_start
17
18
  from .mflog import TASK_LOG_SOURCE
18
19
  from .datastore import Inputs, TaskDataStoreSet
19
20
  from .exception import (
@@ -49,6 +50,8 @@ class MetaflowTask(object):
49
50
  event_logger,
50
51
  monitor,
51
52
  ubf_context,
53
+ orig_flow_datastore=None,
54
+ spin_artifacts=None,
52
55
  ):
53
56
  self.flow = flow
54
57
  self.flow_datastore = flow_datastore
@@ -58,6 +61,8 @@ class MetaflowTask(object):
58
61
  self.event_logger = event_logger
59
62
  self.monitor = monitor
60
63
  self.ubf_context = ubf_context
64
+ self.orig_flow_datastore = orig_flow_datastore
65
+ self.spin_artifacts = spin_artifacts
61
66
 
62
67
  def _exec_step_function(self, step_function, orig_step_func, input_obj=None):
63
68
  wrappers_stack = []
@@ -150,6 +155,7 @@ class MetaflowTask(object):
150
155
  graph_node = self.flow._graph[orig_step_func.name]
151
156
  out_funcs = [getattr(self.flow, f) for f in graph_node.out_funcs]
152
157
  if out_funcs:
158
+ self.flow._transition = None
153
159
  if isinstance(fake_next_call_args, dict) and fake_next_call_args:
154
160
  # Not an empty dictionary -- we use this as arguments for the next
155
161
  # call
@@ -233,7 +239,6 @@ class MetaflowTask(object):
233
239
  lambda _, parameter_ds=parameter_ds: parameter_ds["_graph_info"],
234
240
  )
235
241
  all_vars.append("_graph_info")
236
-
237
242
  if passdown:
238
243
  self.flow._datastore.passdown_partial(parameter_ds, all_vars)
239
244
  return param_only_vars
@@ -261,6 +266,9 @@ class MetaflowTask(object):
261
266
  run_id,
262
267
  pathspecs=input_paths,
263
268
  prefetch_data_artifacts=prefetch_data_artifacts,
269
+ join_type=join_type,
270
+ orig_flow_datastore=self.orig_flow_datastore,
271
+ spin_artifacts=self.spin_artifacts,
264
272
  )
265
273
  ds_list = [ds for ds in datastore_set]
266
274
  if len(ds_list) != len(input_paths):
@@ -272,10 +280,27 @@ class MetaflowTask(object):
272
280
  # initialize directly in the single input case.
273
281
  ds_list = []
274
282
  for input_path in input_paths:
275
- run_id, step_name, task_id = input_path.split("/")
283
+ parts = input_path.split("/")
284
+ if len(parts) == 3:
285
+ run_id, step_name, task_id = parts
286
+ attempt = None
287
+ else:
288
+ run_id, step_name, task_id, attempt = parts
289
+ attempt = int(attempt)
290
+
276
291
  ds_list.append(
277
- self.flow_datastore.get_task_datastore(run_id, step_name, task_id)
292
+ self.flow_datastore.get_task_datastore(
293
+ run_id,
294
+ step_name,
295
+ task_id,
296
+ attempt=attempt,
297
+ join_type=join_type,
298
+ orig_flow_datastore=self.orig_flow_datastore,
299
+ spin_artifacts=self.spin_artifacts,
300
+ )
278
301
  )
302
+ from_start("MetaflowTask: got datastore for input path %s" % input_path)
303
+
279
304
  if not ds_list:
280
305
  # this guards against errors in input paths
281
306
  raise MetaflowDataMissing(
@@ -546,6 +571,8 @@ class MetaflowTask(object):
546
571
  split_index,
547
572
  retry_count,
548
573
  max_user_code_retries,
574
+ whitelist_decorators=None,
575
+ persist=True,
549
576
  ):
550
577
  if run_id and task_id:
551
578
  self.metadata.register_run_id(run_id)
@@ -604,7 +631,14 @@ class MetaflowTask(object):
604
631
 
605
632
  step_func = getattr(self.flow, step_name)
606
633
  decorators = step_func.decorators
607
-
634
+ if self.orig_flow_datastore:
635
+ # We filter only the whitelisted decorators in case of spin step.
636
+ decorators = (
637
+ []
638
+ if not whitelist_decorators
639
+ else [deco for deco in decorators if deco.name in whitelist_decorators]
640
+ )
641
+ from_start("MetaflowTask: decorators initialized")
608
642
  node = self.flow._graph[step_name]
609
643
  join_type = None
610
644
  if node.type == "join":
@@ -612,17 +646,20 @@ class MetaflowTask(object):
612
646
 
613
647
  # 1. initialize output datastore
614
648
  output = self.flow_datastore.get_task_datastore(
615
- run_id, step_name, task_id, attempt=retry_count, mode="w"
649
+ run_id, step_name, task_id, attempt=retry_count, mode="w", persist=persist
616
650
  )
617
651
 
618
652
  output.init_task()
653
+ from_start("MetaflowTask: output datastore initialized")
619
654
 
620
655
  if input_paths:
621
656
  # 2. initialize input datastores
622
657
  inputs = self._init_data(run_id, join_type, input_paths)
658
+ from_start("MetaflowTask: input datastores initialized")
623
659
 
624
660
  # 3. initialize foreach state
625
661
  self._init_foreach(step_name, join_type, inputs, split_index)
662
+ from_start("MetaflowTask: foreach state initialized")
626
663
 
627
664
  # 4. initialize the iteration state
628
665
  is_recursive_step = (
@@ -681,7 +718,7 @@ class MetaflowTask(object):
681
718
  ),
682
719
  ]
683
720
  )
684
-
721
+ from_start("MetaflowTask: finished input processing")
685
722
  self.metadata.register_metadata(
686
723
  run_id,
687
724
  step_name,
@@ -735,8 +772,11 @@ class MetaflowTask(object):
735
772
  "project_flow_name": current.get("project_flow_name"),
736
773
  "trace_id": trace_id or None,
737
774
  }
775
+
776
+ from_start("MetaflowTask: task metadata initialized")
738
777
  start = time.time()
739
778
  self.metadata.start_task_heartbeat(self.flow.name, run_id, step_name, task_id)
779
+ from_start("MetaflowTask: heartbeat started")
740
780
  with self.monitor.measure("metaflow.task.duration"):
741
781
  try:
742
782
  with self.monitor.count("metaflow.task.start"):
@@ -756,7 +796,6 @@ class MetaflowTask(object):
756
796
  # should either be set prior to running the user code or listed in
757
797
  # FlowSpec._EPHEMERAL to allow for proper merging/importing of
758
798
  # user artifacts in the user's step code.
759
-
760
799
  if join_type:
761
800
  # Join step:
762
801
 
@@ -815,11 +854,19 @@ class MetaflowTask(object):
815
854
  "graph_info": self.flow._graph_info,
816
855
  }
817
856
  )
857
+ from_start("MetaflowTask: before pre-step decorators")
818
858
  for deco in decorators:
859
+ if deco.name == "card" and self.orig_flow_datastore:
860
+ # if spin step and card decorator, pass spin metadata
861
+ metadata = [m for m in METADATA_PROVIDERS if m.TYPE == "spin"][
862
+ 0
863
+ ](self.environment, self.flow, self.event_logger, self.monitor)
864
+ else:
865
+ metadata = self.metadata
819
866
  deco.task_pre_step(
820
867
  step_name,
821
868
  output,
822
- self.metadata,
869
+ metadata,
823
870
  run_id,
824
871
  task_id,
825
872
  self.flow,
@@ -845,12 +892,12 @@ class MetaflowTask(object):
845
892
  max_user_code_retries,
846
893
  self.ubf_context,
847
894
  )
848
-
895
+ from_start("MetaflowTask: finished decorator processing")
849
896
  if join_type:
850
897
  self._exec_step_function(step_func, orig_step_func, input_obj)
851
898
  else:
852
899
  self._exec_step_function(step_func, orig_step_func)
853
-
900
+ from_start("MetaflowTask: step function executed")
854
901
  for deco in decorators:
855
902
  deco.task_post_step(
856
903
  step_name,
@@ -893,6 +940,7 @@ class MetaflowTask(object):
893
940
  raise
894
941
 
895
942
  finally:
943
+ from_start("MetaflowTask: decorators finalized")
896
944
  if self.ubf_context == UBF_CONTROL:
897
945
  self._finalize_control_task()
898
946
 
@@ -932,7 +980,7 @@ class MetaflowTask(object):
932
980
  )
933
981
 
934
982
  output.save_metadata({"task_end": {}})
935
-
983
+ from_start("MetaflowTask: output persisted")
936
984
  # this writes a success marker indicating that the
937
985
  # "transaction" is done
938
986
  output.done()
@@ -961,3 +1009,4 @@ class MetaflowTask(object):
961
1009
  name="duration",
962
1010
  payload={**task_payload, "msg": str(duration)},
963
1011
  )
1012
+ from_start("MetaflowTask: task run completed")
@@ -379,7 +379,7 @@ class DelayEvaluator(collections.abc.Mapping):
379
379
  to_eval_expr,
380
380
  self._globals or globals(),
381
381
  {
382
- k: ConfigValue(v)
382
+ k: ConfigValue(v) if v is not None else None
383
383
  for k, v in flow_cls._flow_state.get(_FlowState.CONFIGS, {}).items()
384
384
  },
385
385
  )
@@ -507,9 +507,7 @@ class Config(Parameter, collections.abc.Mapping):
507
507
  self._delayed_evaluator = None
508
508
 
509
509
  def load_parameter(self, v):
510
- if v is None:
511
- return None
512
- return ConfigValue(v)
510
+ return ConfigValue(v) if v is not None else None
513
511
 
514
512
  def _store_value(self, v: Any) -> None:
515
513
  self._computed_value = v
@@ -114,7 +114,7 @@ class MutableFlow:
114
114
  for name, value in self._flow_cls._flow_state.get(
115
115
  _FlowState.CONFIGS, {}
116
116
  ).items():
117
- r = name, ConfigValue(value)
117
+ r = name, ConfigValue(value) if value is not None else None
118
118
  debug.userconf_exec("Mutable flow yielding config: %s" % str(r))
119
119
  yield r
120
120