ob-metaflow 2.15.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. metaflow/__init__.py +10 -3
  2. metaflow/_vendor/imghdr/__init__.py +186 -0
  3. metaflow/_vendor/yaml/__init__.py +427 -0
  4. metaflow/_vendor/yaml/composer.py +139 -0
  5. metaflow/_vendor/yaml/constructor.py +748 -0
  6. metaflow/_vendor/yaml/cyaml.py +101 -0
  7. metaflow/_vendor/yaml/dumper.py +62 -0
  8. metaflow/_vendor/yaml/emitter.py +1137 -0
  9. metaflow/_vendor/yaml/error.py +75 -0
  10. metaflow/_vendor/yaml/events.py +86 -0
  11. metaflow/_vendor/yaml/loader.py +63 -0
  12. metaflow/_vendor/yaml/nodes.py +49 -0
  13. metaflow/_vendor/yaml/parser.py +589 -0
  14. metaflow/_vendor/yaml/reader.py +185 -0
  15. metaflow/_vendor/yaml/representer.py +389 -0
  16. metaflow/_vendor/yaml/resolver.py +227 -0
  17. metaflow/_vendor/yaml/scanner.py +1435 -0
  18. metaflow/_vendor/yaml/serializer.py +111 -0
  19. metaflow/_vendor/yaml/tokens.py +104 -0
  20. metaflow/cards.py +4 -0
  21. metaflow/cli.py +125 -21
  22. metaflow/cli_components/init_cmd.py +1 -0
  23. metaflow/cli_components/run_cmds.py +204 -40
  24. metaflow/cli_components/step_cmd.py +160 -4
  25. metaflow/client/__init__.py +1 -0
  26. metaflow/client/core.py +198 -130
  27. metaflow/client/filecache.py +59 -32
  28. metaflow/cmd/code/__init__.py +2 -1
  29. metaflow/cmd/develop/stub_generator.py +49 -18
  30. metaflow/cmd/develop/stubs.py +9 -27
  31. metaflow/cmd/make_wrapper.py +30 -0
  32. metaflow/datastore/__init__.py +1 -0
  33. metaflow/datastore/content_addressed_store.py +40 -9
  34. metaflow/datastore/datastore_set.py +10 -1
  35. metaflow/datastore/flow_datastore.py +124 -4
  36. metaflow/datastore/spin_datastore.py +91 -0
  37. metaflow/datastore/task_datastore.py +92 -6
  38. metaflow/debug.py +5 -0
  39. metaflow/decorators.py +331 -82
  40. metaflow/extension_support/__init__.py +414 -356
  41. metaflow/extension_support/_empty_file.py +2 -2
  42. metaflow/flowspec.py +322 -82
  43. metaflow/graph.py +178 -15
  44. metaflow/includefile.py +25 -3
  45. metaflow/lint.py +94 -3
  46. metaflow/meta_files.py +13 -0
  47. metaflow/metadata_provider/metadata.py +13 -2
  48. metaflow/metaflow_config.py +66 -4
  49. metaflow/metaflow_environment.py +91 -25
  50. metaflow/metaflow_profile.py +18 -0
  51. metaflow/metaflow_version.py +16 -1
  52. metaflow/package/__init__.py +673 -0
  53. metaflow/packaging_sys/__init__.py +880 -0
  54. metaflow/packaging_sys/backend.py +128 -0
  55. metaflow/packaging_sys/distribution_support.py +153 -0
  56. metaflow/packaging_sys/tar_backend.py +99 -0
  57. metaflow/packaging_sys/utils.py +54 -0
  58. metaflow/packaging_sys/v1.py +527 -0
  59. metaflow/parameters.py +6 -2
  60. metaflow/plugins/__init__.py +6 -0
  61. metaflow/plugins/airflow/airflow.py +11 -1
  62. metaflow/plugins/airflow/airflow_cli.py +16 -5
  63. metaflow/plugins/argo/argo_client.py +42 -20
  64. metaflow/plugins/argo/argo_events.py +6 -6
  65. metaflow/plugins/argo/argo_workflows.py +1023 -344
  66. metaflow/plugins/argo/argo_workflows_cli.py +396 -94
  67. metaflow/plugins/argo/argo_workflows_decorator.py +9 -0
  68. metaflow/plugins/argo/argo_workflows_deployer_objects.py +75 -49
  69. metaflow/plugins/argo/capture_error.py +5 -2
  70. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  71. metaflow/plugins/argo/exit_hooks.py +209 -0
  72. metaflow/plugins/argo/param_val.py +19 -0
  73. metaflow/plugins/aws/aws_client.py +6 -0
  74. metaflow/plugins/aws/aws_utils.py +33 -1
  75. metaflow/plugins/aws/batch/batch.py +72 -5
  76. metaflow/plugins/aws/batch/batch_cli.py +24 -3
  77. metaflow/plugins/aws/batch/batch_decorator.py +57 -6
  78. metaflow/plugins/aws/step_functions/step_functions.py +28 -3
  79. metaflow/plugins/aws/step_functions/step_functions_cli.py +49 -4
  80. metaflow/plugins/aws/step_functions/step_functions_deployer.py +3 -0
  81. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
  82. metaflow/plugins/cards/card_cli.py +20 -1
  83. metaflow/plugins/cards/card_creator.py +24 -1
  84. metaflow/plugins/cards/card_datastore.py +21 -49
  85. metaflow/plugins/cards/card_decorator.py +58 -6
  86. metaflow/plugins/cards/card_modules/basic.py +38 -9
  87. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  88. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  89. metaflow/plugins/cards/card_modules/components.py +592 -3
  90. metaflow/plugins/cards/card_modules/convert_to_native_type.py +34 -5
  91. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  92. metaflow/plugins/cards/card_modules/main.css +1 -0
  93. metaflow/plugins/cards/card_modules/main.js +56 -41
  94. metaflow/plugins/cards/card_modules/test_cards.py +22 -6
  95. metaflow/plugins/cards/component_serializer.py +1 -8
  96. metaflow/plugins/cards/metadata.py +22 -0
  97. metaflow/plugins/catch_decorator.py +9 -0
  98. metaflow/plugins/datastores/local_storage.py +12 -6
  99. metaflow/plugins/datastores/spin_storage.py +12 -0
  100. metaflow/plugins/datatools/s3/s3.py +49 -17
  101. metaflow/plugins/datatools/s3/s3op.py +113 -66
  102. metaflow/plugins/env_escape/client_modules.py +102 -72
  103. metaflow/plugins/events_decorator.py +127 -121
  104. metaflow/plugins/exit_hook/__init__.py +0 -0
  105. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  106. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  107. metaflow/plugins/kubernetes/kubernetes.py +12 -1
  108. metaflow/plugins/kubernetes/kubernetes_cli.py +11 -0
  109. metaflow/plugins/kubernetes/kubernetes_decorator.py +25 -6
  110. metaflow/plugins/kubernetes/kubernetes_job.py +12 -4
  111. metaflow/plugins/kubernetes/kubernetes_jobsets.py +31 -30
  112. metaflow/plugins/metadata_providers/local.py +76 -82
  113. metaflow/plugins/metadata_providers/service.py +13 -9
  114. metaflow/plugins/metadata_providers/spin.py +16 -0
  115. metaflow/plugins/package_cli.py +36 -24
  116. metaflow/plugins/parallel_decorator.py +11 -2
  117. metaflow/plugins/parsers.py +16 -0
  118. metaflow/plugins/pypi/bootstrap.py +7 -1
  119. metaflow/plugins/pypi/conda_decorator.py +41 -82
  120. metaflow/plugins/pypi/conda_environment.py +14 -6
  121. metaflow/plugins/pypi/micromamba.py +9 -1
  122. metaflow/plugins/pypi/pip.py +41 -5
  123. metaflow/plugins/pypi/pypi_decorator.py +4 -4
  124. metaflow/plugins/pypi/utils.py +22 -0
  125. metaflow/plugins/secrets/__init__.py +3 -0
  126. metaflow/plugins/secrets/secrets_decorator.py +14 -178
  127. metaflow/plugins/secrets/secrets_func.py +49 -0
  128. metaflow/plugins/secrets/secrets_spec.py +101 -0
  129. metaflow/plugins/secrets/utils.py +74 -0
  130. metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
  131. metaflow/plugins/timeout_decorator.py +0 -1
  132. metaflow/plugins/uv/bootstrap.py +29 -1
  133. metaflow/plugins/uv/uv_environment.py +5 -3
  134. metaflow/pylint_wrapper.py +5 -1
  135. metaflow/runner/click_api.py +79 -26
  136. metaflow/runner/deployer.py +208 -6
  137. metaflow/runner/deployer_impl.py +32 -12
  138. metaflow/runner/metaflow_runner.py +266 -33
  139. metaflow/runner/subprocess_manager.py +21 -1
  140. metaflow/runner/utils.py +27 -16
  141. metaflow/runtime.py +660 -66
  142. metaflow/task.py +255 -26
  143. metaflow/user_configs/config_options.py +33 -21
  144. metaflow/user_configs/config_parameters.py +220 -58
  145. metaflow/user_decorators/__init__.py +0 -0
  146. metaflow/user_decorators/common.py +144 -0
  147. metaflow/user_decorators/mutable_flow.py +512 -0
  148. metaflow/user_decorators/mutable_step.py +424 -0
  149. metaflow/user_decorators/user_flow_decorator.py +264 -0
  150. metaflow/user_decorators/user_step_decorator.py +749 -0
  151. metaflow/util.py +197 -7
  152. metaflow/vendor.py +23 -7
  153. metaflow/version.py +1 -1
  154. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Makefile +13 -2
  155. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Tiltfile +107 -7
  156. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/pick_services.sh +1 -0
  157. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/METADATA +2 -3
  158. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/RECORD +162 -121
  159. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  160. metaflow/_vendor/v3_5/__init__.py +0 -1
  161. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  162. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  163. metaflow/_vendor/v3_5/zipp.py +0 -329
  164. metaflow/info_file.py +0 -25
  165. metaflow/package.py +0 -203
  166. metaflow/user_configs/config_decorators.py +0 -568
  167. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +0 -0
  168. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/licenses/LICENSE +0 -0
  169. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
metaflow/runtime.py CHANGED
@@ -15,28 +15,40 @@ import tempfile
15
15
  import time
16
16
  import subprocess
17
17
  from datetime import datetime
18
+ from enum import Enum
18
19
  from io import BytesIO
20
+ from itertools import chain
19
21
  from functools import partial
20
22
  from concurrent import futures
21
23
 
24
+ from typing import Dict, Tuple
22
25
  from metaflow.datastore.exceptions import DataException
23
26
  from contextlib import contextmanager
24
27
 
25
28
  from . import get_namespace
29
+ from .client.filecache import FileCache, FileBlobCache, TaskMetadataCache
26
30
  from .metadata_provider import MetaDatum
27
- from .metaflow_config import MAX_ATTEMPTS, UI_URL
31
+ from .metaflow_config import (
32
+ FEAT_ALWAYS_UPLOAD_CODE_PACKAGE,
33
+ MAX_ATTEMPTS,
34
+ UI_URL,
35
+ SPIN_ALLOWED_DECORATORS,
36
+ SPIN_DISALLOWED_DECORATORS,
37
+ )
38
+ from .metaflow_profile import from_start
39
+ from .plugins import DATASTORES
28
40
  from .exception import (
29
41
  MetaflowException,
30
42
  MetaflowInternalError,
31
43
  METAFLOW_EXIT_DISALLOW_RETRY,
32
44
  )
33
45
  from . import procpoll
34
- from .datastore import TaskDataStoreSet
46
+ from .datastore import FlowDataStore, TaskDataStoreSet
35
47
  from .debug import debug
36
48
  from .decorators import flow_decorators
37
- from .flowspec import _FlowState
49
+ from .flowspec import FlowStateItems
38
50
  from .mflog import mflog, RUNTIME_LOG_SOURCE
39
- from .util import to_unicode, compress_list, unicode_type
51
+ from .util import to_unicode, compress_list, unicode_type, get_latest_task_pathspec
40
52
  from .clone_util import clone_task_helper
41
53
  from .unbounded_foreach import (
42
54
  CONTROL_TASK_TAG,
@@ -59,6 +71,7 @@ PROGRESS_INTERVAL = 300 # s
59
71
  # leveraging the TaskDataStoreSet.
60
72
  PREFETCH_DATA_ARTIFACTS = [
61
73
  "_foreach_stack",
74
+ "_iteration_stack",
62
75
  "_task_ok",
63
76
  "_transition",
64
77
  "_control_mapper_tasks",
@@ -66,6 +79,14 @@ PREFETCH_DATA_ARTIFACTS = [
66
79
  ]
67
80
  RESUME_POLL_SECONDS = 60
68
81
 
82
+
83
+ class LoopBehavior(Enum):
84
+ NONE = "none"
85
+ ENTERING = "entering"
86
+ EXITING = "exiting"
87
+ LOOPING = "looping"
88
+
89
+
69
90
  # Runtime must use logsource=RUNTIME_LOG_SOURCE for all loglines that it
70
91
  # formats according to mflog. See a comment in mflog.__init__
71
92
  mflog_msg = partial(mflog.decorate, RUNTIME_LOG_SOURCE)
@@ -73,6 +94,253 @@ mflog_msg = partial(mflog.decorate, RUNTIME_LOG_SOURCE)
73
94
  # TODO option: output dot graph periodically about execution
74
95
 
75
96
 
97
+ class SpinRuntime(object):
98
+ def __init__(
99
+ self,
100
+ flow,
101
+ graph,
102
+ flow_datastore,
103
+ metadata,
104
+ environment,
105
+ package,
106
+ logger,
107
+ entrypoint,
108
+ event_logger,
109
+ monitor,
110
+ step_func,
111
+ step_name,
112
+ spin_pathspec,
113
+ skip_decorators=False,
114
+ artifacts_module=None,
115
+ persist=True,
116
+ max_log_size=MAX_LOG_SIZE,
117
+ ):
118
+ from metaflow import Task
119
+
120
+ self._flow = flow
121
+ self._graph = graph
122
+ self._flow_datastore = flow_datastore
123
+ self._metadata = metadata
124
+ self._environment = environment
125
+ self._package = package
126
+ self._logger = logger
127
+ self._entrypoint = entrypoint
128
+ self._event_logger = event_logger
129
+ self._monitor = monitor
130
+
131
+ self._step_func = step_func
132
+
133
+ # Determine if we have a complete pathspec or need to get the task
134
+ if spin_pathspec:
135
+ parts = spin_pathspec.split("/")
136
+ if len(parts) == 4:
137
+ # Complete pathspec: flow/run/step/task_id
138
+ try:
139
+ # If user provides whole pathspec, we do not need to check namespace
140
+ task = Task(spin_pathspec, _namespace_check=False)
141
+ except Exception:
142
+ raise MetaflowException(
143
+ f"Invalid pathspec: {spin_pathspec} for step: {step_name}"
144
+ )
145
+ elif len(parts) == 3:
146
+ # Partial pathspec: flow/run/step - need to get the task
147
+ _, run_id, _ = parts
148
+ task = get_latest_task_pathspec(flow.name, step_name, run_id=run_id)
149
+ logger(
150
+ f"To make spin even faster, provide complete pathspec with task_id: {task.pathspec}",
151
+ system_msg=True,
152
+ )
153
+ else:
154
+ raise MetaflowException(
155
+ f"Invalid pathspec format: {spin_pathspec}. Expected flow/run/step or flow/run/step/task_id"
156
+ )
157
+ else:
158
+ # No pathspec provided, get latest task for this step
159
+ task = get_latest_task_pathspec(flow.name, step_name)
160
+ logger(
161
+ f"To make spin even faster, provide complete pathspec {task.pathspec}",
162
+ system_msg=True,
163
+ )
164
+ from_start("SpinRuntime: after getting task")
165
+
166
+ # Get the original FlowDatastore so we can use it to access artifacts from the
167
+ # spun task
168
+ meta_dict = task.metadata_dict
169
+ ds_type = meta_dict["ds-type"]
170
+ ds_root = meta_dict["ds-root"]
171
+ orig_datastore_impl = [d for d in DATASTORES if d.TYPE == ds_type][0]
172
+ orig_datastore_impl.datastore_root = ds_root
173
+ spin_pathspec = task.pathspec
174
+ orig_flow_datastore = FlowDataStore(
175
+ flow.name,
176
+ environment=None,
177
+ storage_impl=orig_datastore_impl,
178
+ ds_root=ds_root,
179
+ )
180
+
181
+ self._filecache = FileCache()
182
+ orig_flow_datastore.set_metadata_cache(
183
+ TaskMetadataCache(self._filecache, ds_type, ds_root, flow.name)
184
+ )
185
+ orig_flow_datastore.ca_store.set_blob_cache(
186
+ FileBlobCache(
187
+ self._filecache, FileCache.flow_ds_id(ds_type, ds_root, flow.name)
188
+ )
189
+ )
190
+
191
+ self._orig_flow_datastore = orig_flow_datastore
192
+ self._spin_pathspec = spin_pathspec
193
+ self._persist = persist
194
+ self._spin_task = task
195
+ self._input_paths = None
196
+ self._split_index = None
197
+ self._whitelist_decorators = None
198
+ self._config_file_name = None
199
+ self._skip_decorators = skip_decorators
200
+ self._artifacts_module = artifacts_module
201
+ self._max_log_size = max_log_size
202
+ self._encoding = sys.stdout.encoding or "UTF-8"
203
+
204
+ # Create a new run_id for the spin task
205
+ self.run_id = self._metadata.new_run_id()
206
+ # Raise exception if we have a black listed decorator
207
+ for deco in self._step_func.decorators:
208
+ if deco.name in SPIN_DISALLOWED_DECORATORS:
209
+ raise MetaflowException(
210
+ f"Spinning steps with @{deco.name} decorator is not supported."
211
+ )
212
+
213
+ for deco in self.whitelist_decorators:
214
+ deco.runtime_init(flow, graph, package, self.run_id)
215
+ from_start("SpinRuntime: after init decorators")
216
+
217
+ @property
218
+ def split_index(self):
219
+ """
220
+ Returns the split index, caching the result after the first access.
221
+ """
222
+ if self._split_index is None:
223
+ self._split_index = getattr(self._spin_task, "index", None)
224
+
225
+ return self._split_index
226
+
227
+ @property
228
+ def input_paths(self):
229
+ def _format_input_paths(task_pathspec, attempt):
230
+ _, run_id, step_name, task_id = task_pathspec.split("/")
231
+ return f"{run_id}/{step_name}/{task_id}/{attempt}"
232
+
233
+ if self._input_paths:
234
+ return self._input_paths
235
+
236
+ if self._step_func.name == "start":
237
+ from metaflow import Step
238
+
239
+ flow_name, run_id, _, _ = self._spin_pathspec.split("/")
240
+ task = Step(
241
+ f"{flow_name}/{run_id}/_parameters", _namespace_check=False
242
+ ).task
243
+ self._input_paths = [
244
+ _format_input_paths(task.pathspec, task.current_attempt)
245
+ ]
246
+ else:
247
+ parent_tasks = self._spin_task.parent_tasks
248
+ self._input_paths = [
249
+ _format_input_paths(t.pathspec, t.current_attempt) for t in parent_tasks
250
+ ]
251
+ return self._input_paths
252
+
253
+ @property
254
+ def whitelist_decorators(self):
255
+ if self._skip_decorators:
256
+ self._whitelist_decorators = []
257
+ return self._whitelist_decorators
258
+ if self._whitelist_decorators:
259
+ return self._whitelist_decorators
260
+ self._whitelist_decorators = [
261
+ deco
262
+ for deco in self._step_func.decorators
263
+ if any(deco.name.startswith(prefix) for prefix in SPIN_ALLOWED_DECORATORS)
264
+ ]
265
+ return self._whitelist_decorators
266
+
267
+ def _new_task(self, step, input_paths=None, **kwargs):
268
+ return Task(
269
+ flow_datastore=self._flow_datastore,
270
+ flow=self._flow,
271
+ step=step,
272
+ run_id=self.run_id,
273
+ metadata=self._metadata,
274
+ environment=self._environment,
275
+ entrypoint=self._entrypoint,
276
+ event_logger=self._event_logger,
277
+ monitor=self._monitor,
278
+ input_paths=input_paths,
279
+ decos=self.whitelist_decorators,
280
+ logger=self._logger,
281
+ split_index=self.split_index,
282
+ **kwargs,
283
+ )
284
+
285
+ def execute(self):
286
+ exception = None
287
+ with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as config_file:
288
+ config_value = dump_config_values(self._flow)
289
+ if config_value:
290
+ json.dump(config_value, config_file)
291
+ config_file.flush()
292
+ self._config_file_name = config_file.name
293
+ else:
294
+ self._config_file_name = None
295
+ from_start("SpinRuntime: config values processed")
296
+ self.task = self._new_task(self._step_func.name, self.input_paths)
297
+ try:
298
+ self._launch_and_monitor_task()
299
+ except Exception as ex:
300
+ self._logger("Task failed.", system_msg=True, bad=True)
301
+ exception = ex
302
+ raise
303
+ finally:
304
+ for deco in self.whitelist_decorators:
305
+ deco.runtime_finished(exception)
306
+
307
+ def _launch_and_monitor_task(self):
308
+ worker = Worker(
309
+ self.task,
310
+ self._max_log_size,
311
+ self._config_file_name,
312
+ orig_flow_datastore=self._orig_flow_datastore,
313
+ spin_pathspec=self._spin_pathspec,
314
+ artifacts_module=self._artifacts_module,
315
+ persist=self._persist,
316
+ skip_decorators=self._skip_decorators,
317
+ )
318
+ from_start("SpinRuntime: created worker")
319
+
320
+ poll = procpoll.make_poll()
321
+ fds = worker.fds()
322
+ for fd in fds:
323
+ poll.add(fd)
324
+
325
+ active_fds = set(fds)
326
+
327
+ while active_fds:
328
+ events = poll.poll(POLL_TIMEOUT)
329
+ for event in events:
330
+ if event.can_read:
331
+ worker.read_logline(event.fd)
332
+ if event.is_terminated:
333
+ poll.remove(event.fd)
334
+ active_fds.remove(event.fd)
335
+ from_start("SpinRuntime: read loglines")
336
+ returncode = worker.terminate()
337
+ from_start("SpinRuntime: worker terminated")
338
+ if returncode != 0:
339
+ raise TaskFailed(self.task, f"Task failed with return code {returncode}")
340
+ else:
341
+ self._logger("Task finished successfully.", system_msg=True)
342
+
343
+
76
344
  class NativeRuntime(object):
77
345
  def __init__(
78
346
  self,
@@ -95,6 +363,7 @@ class NativeRuntime(object):
95
363
  max_num_splits=MAX_NUM_SPLITS,
96
364
  max_log_size=MAX_LOG_SIZE,
97
365
  resume_identifier=None,
366
+ skip_decorator_hooks=False,
98
367
  ):
99
368
  if run_id is None:
100
369
  self._run_id = metadata.new_run_id()
@@ -107,6 +376,7 @@ class NativeRuntime(object):
107
376
  self._flow_datastore = flow_datastore
108
377
  self._metadata = metadata
109
378
  self._environment = environment
379
+ self._package = package
110
380
  self._logger = logger
111
381
  self._max_workers = max_workers
112
382
  self._active_tasks = dict() # Key: step name;
@@ -128,6 +398,7 @@ class NativeRuntime(object):
128
398
  self._ran_or_scheduled_task_index = set()
129
399
  self._reentrant = reentrant
130
400
  self._run_url = None
401
+ self._skip_decorator_hooks = skip_decorator_hooks
131
402
 
132
403
  # If steps_to_rerun is specified, we will not clone them in resume mode.
133
404
  self._steps_to_rerun = steps_to_rerun or {}
@@ -179,9 +450,10 @@ class NativeRuntime(object):
179
450
  # finished.
180
451
  self._control_num_splits = {} # control_task -> num_splits mapping
181
452
 
182
- for step in flow:
183
- for deco in step.decorators:
184
- deco.runtime_init(flow, graph, package, self._run_id)
453
+ if not self._skip_decorator_hooks:
454
+ for step in flow:
455
+ for deco in step.decorators:
456
+ deco.runtime_init(flow, graph, package, self._run_id)
185
457
 
186
458
  def _new_task(self, step, input_paths=None, **kwargs):
187
459
  if input_paths is None:
@@ -192,7 +464,7 @@ class NativeRuntime(object):
192
464
  if step in self._steps_to_rerun:
193
465
  may_clone = False
194
466
 
195
- if step == "_parameters":
467
+ if step == "_parameters" or self._skip_decorator_hooks:
196
468
  decos = []
197
469
  else:
198
470
  decos = getattr(self._flow, step).decorators
@@ -285,6 +557,7 @@ class NativeRuntime(object):
285
557
  pathspec_index,
286
558
  cloned_task_pathspec_index,
287
559
  finished_tuple,
560
+ iteration_tuple,
288
561
  ubf_context,
289
562
  generate_task_obj,
290
563
  verbose=False,
@@ -329,7 +602,7 @@ class NativeRuntime(object):
329
602
  self._metadata,
330
603
  origin_ds_set=self._origin_ds_set,
331
604
  )
332
- self._finished[(step_name, finished_tuple)] = task_pathspec
605
+ self._finished[(step_name, finished_tuple, iteration_tuple)] = task_pathspec
333
606
  self._is_cloned[task_pathspec] = True
334
607
  except Exception as e:
335
608
  self._logger(
@@ -410,6 +683,7 @@ class NativeRuntime(object):
410
683
  finished_tuple = tuple(
411
684
  [s._replace(value=0) for s in task_ds.get("_foreach_stack", ())]
412
685
  )
686
+ iteration_tuple = tuple(task_ds.get("_iteration_stack", ()))
413
687
  cloned_task_pathspec_index = pathspec_index.split("/")[1]
414
688
  if task_ds.get("_control_task_is_mapper_zero", False):
415
689
  # Replace None with index 0 for control task as it is part of the
@@ -435,6 +709,7 @@ class NativeRuntime(object):
435
709
  pathspec_index,
436
710
  cloned_task_pathspec_index,
437
711
  finished_tuple,
712
+ iteration_tuple,
438
713
  is_ubf_mapper_task,
439
714
  ubf_context,
440
715
  )
@@ -449,6 +724,7 @@ class NativeRuntime(object):
449
724
  pathspec_index,
450
725
  cloned_task_pathspec_index,
451
726
  finished_tuple,
727
+ iteration_tuple,
452
728
  ubf_context=ubf_context,
453
729
  generate_task_obj=generate_task_obj and (not is_ubf_mapper_task),
454
730
  verbose=verbose,
@@ -459,6 +735,7 @@ class NativeRuntime(object):
459
735
  pathspec_index,
460
736
  cloned_task_pathspec_index,
461
737
  finished_tuple,
738
+ iteration_tuple,
462
739
  is_ubf_mapper_task,
463
740
  ubf_context,
464
741
  ) in inputs
@@ -479,6 +756,7 @@ class NativeRuntime(object):
479
756
  self._queue_push("start", {"input_paths": [self._params_task.path]})
480
757
  else:
481
758
  self._queue_push("start", {})
759
+
482
760
  progress_tstamp = time.time()
483
761
  with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as config_file:
484
762
  # Configurations are passed through a file to avoid overloading the
@@ -499,7 +777,74 @@ class NativeRuntime(object):
499
777
  ):
500
778
  # 1. are any of the current workers finished?
501
779
  if self._cloned_tasks:
502
- finished_tasks = self._cloned_tasks
780
+ finished_tasks = []
781
+
782
+ # For loops (right now just recursive steps), we need to find
783
+ # the exact frontier because if we queue all "successors" to all
784
+ # the finished iterations, we would incorrectly launch multiple
785
+ # successors. We therefore have to strip out all non-last
786
+ # iterations *per* foreach branch.
787
+ idx_per_finished_id = (
788
+ {}
789
+ ) # type: Dict[Tuple[str, Tuple[int, ...], Tuple[int, Tuple[int, ...]]]]
790
+ for task in self._cloned_tasks:
791
+ step_name, foreach_stack, iteration_stack = task.finished_id
792
+ existing_task_idx = idx_per_finished_id.get(
793
+ (step_name, foreach_stack), None
794
+ )
795
+ if existing_task_idx is not None:
796
+ len_diff = len(iteration_stack) - len(
797
+ existing_task_idx[1]
798
+ )
799
+ # In this case, we need to keep only the latest iteration
800
+ if (
801
+ len_diff == 0
802
+ and iteration_stack > existing_task_idx[1]
803
+ ) or len_diff == -1:
804
+ # We remove the one we currently have and replace
805
+ # by this one. The second option means that we are
806
+ # adding the finished iteration marker.
807
+ existing_task = finished_tasks[existing_task_idx[0]]
808
+ # These are the first two lines of _queue_tasks
809
+ # We still consider the tasks finished so we need
810
+ # to update state to be clean.
811
+ self._finished[existing_task.finished_id] = (
812
+ existing_task.path
813
+ )
814
+ self._is_cloned[existing_task.path] = (
815
+ existing_task.is_cloned
816
+ )
817
+
818
+ finished_tasks[existing_task_idx[0]] = task
819
+ idx_per_finished_id[(step_name, foreach_stack)] = (
820
+ existing_task_idx[0],
821
+ iteration_stack,
822
+ )
823
+ elif (
824
+ len_diff == 0
825
+ and iteration_stack < existing_task_idx[1]
826
+ ) or len_diff == 1:
827
+ # The second option is when we have already marked
828
+ # the end of the iteration in self._finished and
829
+ # are now seeing a previous iteration.
830
+ # We just mark the task as finished but we don't
831
+ # put it in the finished_tasks list to pass to
832
+ # the _queue_tasks function
833
+ self._finished[task.finished_id] = task.path
834
+ self._is_cloned[task.path] = task.is_cloned
835
+ else:
836
+ raise MetaflowInternalError(
837
+ "Unexpected recursive cloned tasks -- "
838
+ "this is a bug, please report it."
839
+ )
840
+ else:
841
+ # New entry
842
+ finished_tasks.append(task)
843
+ idx_per_finished_id[(step_name, foreach_stack)] = (
844
+ len(finished_tasks) - 1,
845
+ iteration_stack,
846
+ )
847
+
503
848
  # reset the list of cloned tasks and let poll_workers handle
504
849
  # the remaining transition
505
850
  self._cloned_tasks = []
@@ -566,12 +911,14 @@ class NativeRuntime(object):
566
911
  raise
567
912
  finally:
568
913
  # on finish clean tasks
569
- for step in self._flow:
570
- for deco in step.decorators:
571
- deco.runtime_finished(exception)
914
+ if not self._skip_decorator_hooks:
915
+ for step in self._flow:
916
+ for deco in step.decorators:
917
+ deco.runtime_finished(exception)
918
+ self._run_exit_hooks()
572
919
 
573
920
  # assert that end was executed and it was successful
574
- if ("end", ()) in self._finished:
921
+ if ("end", (), ()) in self._finished:
575
922
  if self._run_url:
576
923
  self._logger(
577
924
  "Done! See the run in the UI at %s" % self._run_url,
@@ -591,6 +938,51 @@ class NativeRuntime(object):
591
938
  "The *end* step was not successful by the end of flow."
592
939
  )
593
940
 
941
+ def _run_exit_hooks(self):
942
+ try:
943
+ flow_decos = self._flow._flow_state[FlowStateItems.FLOW_DECORATORS]
944
+ exit_hook_decos = flow_decos.get("exit_hook", [])
945
+ if not exit_hook_decos:
946
+ return
947
+
948
+ successful = ("end", (), ()) in self._finished or self._clone_only
949
+ pathspec = f"{self._graph.name}/{self._run_id}"
950
+ flow_file = self._environment.get_environment_info()["script"]
951
+
952
+ def _call(fn_name):
953
+ try:
954
+ result = (
955
+ subprocess.check_output(
956
+ args=[
957
+ sys.executable,
958
+ "-m",
959
+ "metaflow.plugins.exit_hook.exit_hook_script",
960
+ flow_file,
961
+ fn_name,
962
+ pathspec,
963
+ ],
964
+ env=os.environ,
965
+ )
966
+ .decode()
967
+ .strip()
968
+ )
969
+ print(result)
970
+ except subprocess.CalledProcessError as e:
971
+ print(f"[exit_hook] Hook '{fn_name}' failed with error: {e}")
972
+ except Exception as e:
973
+ print(f"[exit_hook] Unexpected error in hook '{fn_name}': {e}")
974
+
975
+ # Call all exit hook functions regardless of individual failures
976
+ for fn_name in [
977
+ name
978
+ for deco in exit_hook_decos
979
+ for name in (deco.success_hooks if successful else deco.error_hooks)
980
+ ]:
981
+ _call(fn_name)
982
+
983
+ except Exception as ex:
984
+ pass # do not fail due to exit hooks for whatever reason.
985
+
594
986
  def _killall(self):
595
987
  # If we are here, all children have received a signal and are shutting down.
596
988
  # We want to give them an opportunity to do so and then kill
@@ -621,30 +1013,70 @@ class NativeRuntime(object):
621
1013
 
622
1014
  # Given the current task information (task_index), the type of transition,
623
1015
  # and the split index, return the new task index.
624
- def _translate_index(self, task, next_step, type, split_index=None):
625
-
626
- match = re.match(r"^(.+)\[(.*)\]$", task.task_index)
1016
+ def _translate_index(
1017
+ self, task, next_step, type, split_index=None, loop_mode=LoopBehavior.NONE
1018
+ ):
1019
+ match = re.match(r"^(.+)\[(.*)\]\[(.*)\]$", task.task_index)
1020
+ old_match = re.match(r"^(.+)\[(.*)\]$", task.task_index)
627
1021
  if match:
628
- _, foreach_index = match.groups()
1022
+ _, foreach_index, iteration_index = match.groups()
629
1023
  # Convert foreach_index to a list of integers
630
1024
  if len(foreach_index) > 0:
631
1025
  foreach_index = foreach_index.split(",")
632
1026
  else:
633
1027
  foreach_index = []
1028
+ # Ditto for iteration_index
1029
+ if len(iteration_index) > 0:
1030
+ iteration_index = iteration_index.split(",")
1031
+ else:
1032
+ iteration_index = []
1033
+ elif old_match:
1034
+ _, foreach_index = old_match.groups()
1035
+ # Convert foreach_index to a list of integers
1036
+ if len(foreach_index) > 0:
1037
+ foreach_index = foreach_index.split(",")
1038
+ else:
1039
+ foreach_index = []
1040
+ # Legacy case fallback. No iteration index exists for these runs.
1041
+ iteration_index = []
634
1042
  else:
635
1043
  raise ValueError(
636
- "Index not in the format of {run_id}/{step_name}[{foreach_index}]"
1044
+ "Index not in the format of {run_id}/{step_name}[{foreach_index}][{iteration_index}]"
637
1045
  )
1046
+ if loop_mode == LoopBehavior.NONE:
1047
+ # Check if we are entering a looping construct. Right now, only recursive
1048
+ # steps are looping constructs
1049
+ next_step_node = self._graph[next_step]
1050
+ if (
1051
+ next_step_node.type == "split-switch"
1052
+ and next_step in next_step_node.out_funcs
1053
+ ):
1054
+ loop_mode = LoopBehavior.ENTERING
1055
+
1056
+ # Update iteration_index
1057
+ if loop_mode == LoopBehavior.ENTERING:
1058
+ # We are entering a loop, so we add a new iteration level
1059
+ iteration_index.append("0")
1060
+ elif loop_mode == LoopBehavior.EXITING:
1061
+ iteration_index = iteration_index[:-1]
1062
+ elif loop_mode == LoopBehavior.LOOPING:
1063
+ if len(iteration_index) == 0:
1064
+ raise MetaflowInternalError(
1065
+ "In looping mode but there is no iteration index"
1066
+ )
1067
+ iteration_index[-1] = str(int(iteration_index[-1]) + 1)
1068
+ iteration_index = ",".join(iteration_index)
1069
+
638
1070
  if type == "linear":
639
- return "%s[%s]" % (next_step, ",".join(foreach_index))
1071
+ return "%s[%s][%s]" % (next_step, ",".join(foreach_index), iteration_index)
640
1072
  elif type == "join":
641
1073
  indices = []
642
1074
  if len(foreach_index) > 0:
643
1075
  indices = foreach_index[:-1]
644
- return "%s[%s]" % (next_step, ",".join(indices))
1076
+ return "%s[%s][%s]" % (next_step, ",".join(indices), iteration_index)
645
1077
  elif type == "split":
646
1078
  foreach_index.append(str(split_index))
647
- return "%s[%s]" % (next_step, ",".join(foreach_index))
1079
+ return "%s[%s][%s]" % (next_step, ",".join(foreach_index), iteration_index)
648
1080
 
649
1081
  # Store the parameters needed for task creation, so that pushing on items
650
1082
  # onto the run_queue is an inexpensive operation.
@@ -728,17 +1160,19 @@ class NativeRuntime(object):
728
1160
  # tasks is incorrect and contains the pathspec of the *cloned* run
729
1161
  # but we don't use it for anything. We could look to clean it up though
730
1162
  if not task.is_cloned:
731
- _, foreach_stack = task.finished_id
1163
+ _, foreach_stack, iteration_stack = task.finished_id
732
1164
  top = foreach_stack[-1]
733
1165
  bottom = list(foreach_stack[:-1])
734
1166
  for i in range(num_splits):
735
1167
  s = tuple(bottom + [top._replace(index=i)])
736
- self._finished[(task.step, s)] = mapper_tasks[i]
1168
+ self._finished[(task.step, s, iteration_stack)] = mapper_tasks[
1169
+ i
1170
+ ]
737
1171
  self._is_cloned[mapper_tasks[i]] = False
738
1172
 
739
1173
  # Find and check status of control task and retrieve its pathspec
740
1174
  # for retrieving unbounded foreach cardinality.
741
- _, foreach_stack = task.finished_id
1175
+ _, foreach_stack, iteration_stack = task.finished_id
742
1176
  top = foreach_stack[-1]
743
1177
  bottom = list(foreach_stack[:-1])
744
1178
  s = tuple(bottom + [top._replace(index=None)])
@@ -747,7 +1181,7 @@ class NativeRuntime(object):
747
1181
  # it will have index=0 instead of index=None.
748
1182
  if task.results.get("_control_task_is_mapper_zero", False):
749
1183
  s = tuple(bottom + [top._replace(index=0)])
750
- control_path = self._finished.get((task.step, s))
1184
+ control_path = self._finished.get((task.step, s, iteration_stack))
751
1185
  if control_path:
752
1186
  # Control task was successful.
753
1187
  # Additionally check the state of (sibling) mapper tasks as well
@@ -756,7 +1190,9 @@ class NativeRuntime(object):
756
1190
  required_tasks = []
757
1191
  for i in range(num_splits):
758
1192
  s = tuple(bottom + [top._replace(index=i)])
759
- required_tasks.append(self._finished.get((task.step, s)))
1193
+ required_tasks.append(
1194
+ self._finished.get((task.step, s, iteration_stack))
1195
+ )
760
1196
 
761
1197
  if all(required_tasks):
762
1198
  index = self._translate_index(task, next_step, "join")
@@ -769,10 +1205,12 @@ class NativeRuntime(object):
769
1205
  else:
770
1206
  # matching_split is the split-parent of the finished task
771
1207
  matching_split = self._graph[self._graph[next_step].split_parents[-1]]
772
- _, foreach_stack = task.finished_id
773
- index = ""
1208
+ _, foreach_stack, iteration_stack = task.finished_id
1209
+
1210
+ direct_parents = set(self._graph[next_step].in_funcs)
1211
+
1212
+ # next step is a foreach join
774
1213
  if matching_split.type == "foreach":
775
- # next step is a foreach join
776
1214
 
777
1215
  def siblings(foreach_stack):
778
1216
  top = foreach_stack[-1]
@@ -781,29 +1219,56 @@ class NativeRuntime(object):
781
1219
  yield tuple(bottom + [top._replace(index=index)])
782
1220
 
783
1221
  # required tasks are all split-siblings of the finished task
784
- required_tasks = [
785
- self._finished.get((task.step, s)) for s in siblings(foreach_stack)
786
- ]
1222
+ required_tasks = list(
1223
+ filter(
1224
+ lambda x: x is not None,
1225
+ [
1226
+ self._finished.get((p, s, iteration_stack))
1227
+ for p in direct_parents
1228
+ for s in siblings(foreach_stack)
1229
+ ],
1230
+ )
1231
+ )
1232
+ required_count = task.finished_id[1][-1].num_splits
787
1233
  join_type = "foreach"
788
1234
  index = self._translate_index(task, next_step, "join")
789
1235
  else:
790
1236
  # next step is a split
791
- # required tasks are all branches joined by the next step
792
- required_tasks = [
793
- self._finished.get((step, foreach_stack))
794
- for step in self._graph[next_step].in_funcs
795
- ]
1237
+ required_tasks = list(
1238
+ filter(
1239
+ lambda x: x is not None,
1240
+ [
1241
+ self._finished.get((p, foreach_stack, iteration_stack))
1242
+ for p in direct_parents
1243
+ ],
1244
+ )
1245
+ )
1246
+
1247
+ required_count = len(matching_split.out_funcs)
796
1248
  join_type = "linear"
797
1249
  index = self._translate_index(task, next_step, "linear")
798
-
799
- if all(required_tasks):
800
- # all tasks to be joined are ready. Schedule the next join step.
1250
+ if len(required_tasks) == required_count:
1251
+ # We have all the required previous tasks to schedule a join
801
1252
  self._queue_push(
802
1253
  next_step,
803
1254
  {"input_paths": required_tasks, "join_type": join_type},
804
1255
  index,
805
1256
  )
806
1257
 
1258
+ def _queue_task_switch(self, task, next_steps, is_recursive):
1259
+ chosen_step = next_steps[0]
1260
+
1261
+ loop_mode = LoopBehavior.NONE
1262
+ if is_recursive:
1263
+ if chosen_step != task.step:
1264
+ # We are exiting a loop
1265
+ loop_mode = LoopBehavior.EXITING
1266
+ else:
1267
+ # We are staying in the loop
1268
+ loop_mode = LoopBehavior.LOOPING
1269
+ index = self._translate_index(task, chosen_step, "linear", None, loop_mode)
1270
+ self._queue_push(chosen_step, {"input_paths": [task.path]}, index)
1271
+
807
1272
  def _queue_task_foreach(self, task, next_steps):
808
1273
  # CHECK: this condition should be enforced by the linter but
809
1274
  # let's assert that the assumption holds
@@ -880,7 +1345,39 @@ class NativeRuntime(object):
880
1345
  next_steps = []
881
1346
  foreach = None
882
1347
  expected = self._graph[task.step].out_funcs
883
- if next_steps != expected:
1348
+
1349
+ if self._graph[task.step].type == "split-switch":
1350
+ is_recursive = task.step in self._graph[task.step].out_funcs
1351
+ if len(next_steps) != 1:
1352
+ msg = (
1353
+ "Switch step *{step}* should transition to exactly "
1354
+ "one step at runtime, but got: {actual}"
1355
+ )
1356
+ raise MetaflowInternalError(
1357
+ msg.format(step=task.step, actual=", ".join(next_steps))
1358
+ )
1359
+ if next_steps[0] not in expected:
1360
+ msg = (
1361
+ "Switch step *{step}* transitioned to unexpected "
1362
+ "step *{actual}*. Expected one of: {expected}"
1363
+ )
1364
+ raise MetaflowInternalError(
1365
+ msg.format(
1366
+ step=task.step,
1367
+ actual=next_steps[0],
1368
+ expected=", ".join(expected),
1369
+ )
1370
+ )
1371
+ # When exiting a recursive loop, we mark that the loop itself has
1372
+ # finished by adding a special entry in self._finished which has
1373
+ # an iteration stack that is shorter (ie: we are out of the loop) so
1374
+ # that we can then find it when looking at successor tasks to launch.
1375
+ if is_recursive and next_steps[0] != task.step:
1376
+ step_name, finished_tuple, iteration_tuple = task.finished_id
1377
+ self._finished[
1378
+ (step_name, finished_tuple, iteration_tuple[:-1])
1379
+ ] = task.path
1380
+ elif next_steps != expected:
884
1381
  msg = (
885
1382
  "Based on static analysis of the code, step *{step}* "
886
1383
  "was expected to transition to step(s) *{expected}*. "
@@ -904,6 +1401,9 @@ class NativeRuntime(object):
904
1401
  elif foreach:
905
1402
  # Next step is a foreach child
906
1403
  self._queue_task_foreach(task, next_steps)
1404
+ elif self._graph[task.step].type == "split-switch":
1405
+ # Current step is switch - queue the chosen step
1406
+ self._queue_task_switch(task, next_steps, is_recursive)
907
1407
  else:
908
1408
  # Next steps are normal linear steps
909
1409
  for step in next_steps:
@@ -960,6 +1460,22 @@ class NativeRuntime(object):
960
1460
  # Initialize the task (which can be expensive using remote datastores)
961
1461
  # before launching the worker so that cost is amortized over time, instead
962
1462
  # of doing it during _queue_push.
1463
+ if (
1464
+ FEAT_ALWAYS_UPLOAD_CODE_PACKAGE
1465
+ and "METAFLOW_CODE_SHA" not in os.environ
1466
+ ):
1467
+ # We check if the code package is uploaded and, if so, we set the
1468
+ # environment variables that will cause the metadata service to
1469
+ # register the code package with the task created in _new_task below
1470
+ code_sha = self._package.package_sha(timeout=0.01)
1471
+ if code_sha:
1472
+ os.environ["METAFLOW_CODE_SHA"] = code_sha
1473
+ os.environ["METAFLOW_CODE_URL"] = self._package.package_url()
1474
+ os.environ["METAFLOW_CODE_DS"] = self._flow_datastore.TYPE
1475
+ os.environ["METAFLOW_CODE_METADATA"] = (
1476
+ self._package.package_metadata
1477
+ )
1478
+
963
1479
  task = self._new_task(step, **task_kwargs)
964
1480
  self._launch_worker(task)
965
1481
 
@@ -1428,13 +1944,15 @@ class Task(object):
1428
1944
  @property
1429
1945
  def finished_id(self):
1430
1946
  # note: id is not available before the task has finished.
1431
- # Index already identifies the task within the foreach,
1432
- # we will remove foreach value so that it is easier to
1947
+ # Index already identifies the task within the foreach and loop.
1948
+ # We will remove foreach value so that it is easier to
1433
1949
  # identify siblings within a foreach.
1434
1950
  foreach_stack_tuple = tuple(
1435
1951
  [s._replace(value=0) for s in self.results["_foreach_stack"]]
1436
1952
  )
1437
- return (self.step, foreach_stack_tuple)
1953
+ # _iteration_stack requires a fallback, as it does not exist for runs before v2.17.4
1954
+ iteration_stack_tuple = tuple(self.results.get("_iteration_stack", []))
1955
+ return (self.step, foreach_stack_tuple, iteration_stack_tuple)
1438
1956
 
1439
1957
  @property
1440
1958
  def is_cloned(self):
@@ -1508,9 +2026,29 @@ class CLIArgs(object):
1508
2026
  for step execution in StepDecorator.runtime_step_cli().
1509
2027
  """
1510
2028
 
1511
- def __init__(self, task):
2029
+ def __init__(
2030
+ self,
2031
+ task,
2032
+ orig_flow_datastore=None,
2033
+ spin_pathspec=None,
2034
+ artifacts_module=None,
2035
+ persist=True,
2036
+ skip_decorators=False,
2037
+ ):
1512
2038
  self.task = task
2039
+ if orig_flow_datastore is not None:
2040
+ self.orig_flow_datastore = "%s@%s" % (
2041
+ orig_flow_datastore.TYPE,
2042
+ orig_flow_datastore.datastore_root,
2043
+ )
2044
+ else:
2045
+ self.orig_flow_datastore = None
2046
+ self.spin_pathspec = spin_pathspec
2047
+ self.artifacts_module = artifacts_module
2048
+ self.persist = persist
2049
+ self.skip_decorators = skip_decorators
1513
2050
  self.entrypoint = list(task.entrypoint)
2051
+ step_obj = getattr(self.task.flow, self.task.step)
1514
2052
  self.top_level_options = {
1515
2053
  "quiet": True,
1516
2054
  "metadata": self.task.metadata_type,
@@ -1522,8 +2060,12 @@ class CLIArgs(object):
1522
2060
  "datastore-root": self.task.datastore_sysroot,
1523
2061
  "with": [
1524
2062
  deco.make_decorator_spec()
1525
- for deco in self.task.decos
1526
- if not deco.statically_defined
2063
+ for deco in chain(
2064
+ self.task.decos,
2065
+ step_obj.wrappers,
2066
+ step_obj.config_decorators,
2067
+ )
2068
+ if not deco.statically_defined and deco.inserted_by is None
1527
2069
  ],
1528
2070
  }
1529
2071
 
@@ -1536,27 +2078,55 @@ class CLIArgs(object):
1536
2078
  # We also pass configuration options using the kv.<name> syntax which will cause
1537
2079
  # the configuration options to be loaded from the CONFIG file (or local-config-file
1538
2080
  # in the case of the local runtime)
1539
- configs = self.task.flow._flow_state.get(_FlowState.CONFIGS)
2081
+ configs = self.task.flow._flow_state[FlowStateItems.CONFIGS]
1540
2082
  if configs:
1541
2083
  self.top_level_options["config-value"] = [
1542
2084
  (k, ConfigInput.make_key_name(k)) for k in configs
1543
2085
  ]
1544
2086
 
2087
+ if spin_pathspec:
2088
+ self.spin_args()
2089
+ else:
2090
+ self.default_args()
2091
+
2092
+ def default_args(self):
1545
2093
  self.commands = ["step"]
1546
2094
  self.command_args = [self.task.step]
1547
2095
  self.command_options = {
1548
- "run-id": task.run_id,
1549
- "task-id": task.task_id,
1550
- "input-paths": compress_list(task.input_paths),
1551
- "split-index": task.split_index,
1552
- "retry-count": task.retries,
1553
- "max-user-code-retries": task.user_code_retries,
1554
- "tag": task.tags,
2096
+ "run-id": self.task.run_id,
2097
+ "task-id": self.task.task_id,
2098
+ "input-paths": compress_list(self.task.input_paths),
2099
+ "split-index": self.task.split_index,
2100
+ "retry-count": self.task.retries,
2101
+ "max-user-code-retries": self.task.user_code_retries,
2102
+ "tag": self.task.tags,
1555
2103
  "namespace": get_namespace() or "",
1556
- "ubf-context": task.ubf_context,
2104
+ "ubf-context": self.task.ubf_context,
1557
2105
  }
1558
2106
  self.env = {}
1559
2107
 
2108
+ def spin_args(self):
2109
+ self.commands = ["spin-step"]
2110
+ self.command_args = [self.task.step]
2111
+
2112
+ self.command_options = {
2113
+ "run-id": self.task.run_id,
2114
+ "task-id": self.task.task_id,
2115
+ "input-paths": compress_list(self.task.input_paths),
2116
+ "split-index": self.task.split_index,
2117
+ "retry-count": self.task.retries,
2118
+ "max-user-code-retries": self.task.user_code_retries,
2119
+ "namespace": get_namespace() or "",
2120
+ "orig-flow-datastore": self.orig_flow_datastore,
2121
+ "artifacts-module": self.artifacts_module,
2122
+ "skip-decorators": self.skip_decorators,
2123
+ }
2124
+ if self.persist:
2125
+ self.command_options["persist"] = True
2126
+ else:
2127
+ self.command_options["no-persist"] = True
2128
+ self.env = {}
2129
+
1560
2130
  def get_args(self):
1561
2131
  # TODO: Make one with dict_to_cli_options; see cli_args.py for more detail
1562
2132
  def _options(mapping):
@@ -1595,9 +2165,24 @@ class CLIArgs(object):
1595
2165
 
1596
2166
 
1597
2167
  class Worker(object):
1598
- def __init__(self, task, max_logs_size, config_file_name):
2168
+ def __init__(
2169
+ self,
2170
+ task,
2171
+ max_logs_size,
2172
+ config_file_name,
2173
+ orig_flow_datastore=None,
2174
+ spin_pathspec=None,
2175
+ artifacts_module=None,
2176
+ persist=True,
2177
+ skip_decorators=False,
2178
+ ):
1599
2179
  self.task = task
1600
2180
  self._config_file_name = config_file_name
2181
+ self._orig_flow_datastore = orig_flow_datastore
2182
+ self._spin_pathspec = spin_pathspec
2183
+ self._artifacts_module = artifacts_module
2184
+ self._skip_decorators = skip_decorators
2185
+ self._persist = persist
1601
2186
  self._proc = self._launch()
1602
2187
 
1603
2188
  if task.retries > task.user_code_retries:
@@ -1629,7 +2214,14 @@ class Worker(object):
1629
2214
  # not it is properly shut down)
1630
2215
 
1631
2216
  def _launch(self):
1632
- args = CLIArgs(self.task)
2217
+ args = CLIArgs(
2218
+ self.task,
2219
+ orig_flow_datastore=self._orig_flow_datastore,
2220
+ spin_pathspec=self._spin_pathspec,
2221
+ artifacts_module=self._artifacts_module,
2222
+ persist=self._persist,
2223
+ skip_decorators=self._skip_decorators,
2224
+ )
1633
2225
  env = dict(os.environ)
1634
2226
 
1635
2227
  if self.task.clone_run_id:
@@ -1662,6 +2254,7 @@ class Worker(object):
1662
2254
  # by read_logline() below that relies on readline() not blocking
1663
2255
  # print('running', args)
1664
2256
  cmdline = args.get_args()
2257
+ from_start(f"Command line: {' '.join(cmdline)}")
1665
2258
  debug.subcommand_exec(cmdline)
1666
2259
  return subprocess.Popen(
1667
2260
  cmdline,
@@ -1784,13 +2377,14 @@ class Worker(object):
1784
2377
  else:
1785
2378
  self.emit_log(b"Task failed.", self._stderr, system_msg=True)
1786
2379
  else:
1787
- num = self.task.results["_foreach_num_splits"]
1788
- if num:
1789
- self.task.log(
1790
- "Foreach yields %d child steps." % num,
1791
- system_msg=True,
1792
- pid=self._proc.pid,
1793
- )
2380
+ if not self._spin_pathspec:
2381
+ num = self.task.results["_foreach_num_splits"]
2382
+ if num:
2383
+ self.task.log(
2384
+ "Foreach yields %d child steps." % num,
2385
+ system_msg=True,
2386
+ pid=self._proc.pid,
2387
+ )
1794
2388
  self.task.log(
1795
2389
  "Task finished successfully.", system_msg=True, pid=self._proc.pid
1796
2390
  )