ob-metaflow 2.15.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (169) hide show
  1. metaflow/__init__.py +10 -3
  2. metaflow/_vendor/imghdr/__init__.py +186 -0
  3. metaflow/_vendor/yaml/__init__.py +427 -0
  4. metaflow/_vendor/yaml/composer.py +139 -0
  5. metaflow/_vendor/yaml/constructor.py +748 -0
  6. metaflow/_vendor/yaml/cyaml.py +101 -0
  7. metaflow/_vendor/yaml/dumper.py +62 -0
  8. metaflow/_vendor/yaml/emitter.py +1137 -0
  9. metaflow/_vendor/yaml/error.py +75 -0
  10. metaflow/_vendor/yaml/events.py +86 -0
  11. metaflow/_vendor/yaml/loader.py +63 -0
  12. metaflow/_vendor/yaml/nodes.py +49 -0
  13. metaflow/_vendor/yaml/parser.py +589 -0
  14. metaflow/_vendor/yaml/reader.py +185 -0
  15. metaflow/_vendor/yaml/representer.py +389 -0
  16. metaflow/_vendor/yaml/resolver.py +227 -0
  17. metaflow/_vendor/yaml/scanner.py +1435 -0
  18. metaflow/_vendor/yaml/serializer.py +111 -0
  19. metaflow/_vendor/yaml/tokens.py +104 -0
  20. metaflow/cards.py +4 -0
  21. metaflow/cli.py +125 -21
  22. metaflow/cli_components/init_cmd.py +1 -0
  23. metaflow/cli_components/run_cmds.py +204 -40
  24. metaflow/cli_components/step_cmd.py +160 -4
  25. metaflow/client/__init__.py +1 -0
  26. metaflow/client/core.py +198 -130
  27. metaflow/client/filecache.py +59 -32
  28. metaflow/cmd/code/__init__.py +2 -1
  29. metaflow/cmd/develop/stub_generator.py +49 -18
  30. metaflow/cmd/develop/stubs.py +9 -27
  31. metaflow/cmd/make_wrapper.py +30 -0
  32. metaflow/datastore/__init__.py +1 -0
  33. metaflow/datastore/content_addressed_store.py +40 -9
  34. metaflow/datastore/datastore_set.py +10 -1
  35. metaflow/datastore/flow_datastore.py +124 -4
  36. metaflow/datastore/spin_datastore.py +91 -0
  37. metaflow/datastore/task_datastore.py +92 -6
  38. metaflow/debug.py +5 -0
  39. metaflow/decorators.py +331 -82
  40. metaflow/extension_support/__init__.py +414 -356
  41. metaflow/extension_support/_empty_file.py +2 -2
  42. metaflow/flowspec.py +322 -82
  43. metaflow/graph.py +178 -15
  44. metaflow/includefile.py +25 -3
  45. metaflow/lint.py +94 -3
  46. metaflow/meta_files.py +13 -0
  47. metaflow/metadata_provider/metadata.py +13 -2
  48. metaflow/metaflow_config.py +66 -4
  49. metaflow/metaflow_environment.py +91 -25
  50. metaflow/metaflow_profile.py +18 -0
  51. metaflow/metaflow_version.py +16 -1
  52. metaflow/package/__init__.py +673 -0
  53. metaflow/packaging_sys/__init__.py +880 -0
  54. metaflow/packaging_sys/backend.py +128 -0
  55. metaflow/packaging_sys/distribution_support.py +153 -0
  56. metaflow/packaging_sys/tar_backend.py +99 -0
  57. metaflow/packaging_sys/utils.py +54 -0
  58. metaflow/packaging_sys/v1.py +527 -0
  59. metaflow/parameters.py +6 -2
  60. metaflow/plugins/__init__.py +6 -0
  61. metaflow/plugins/airflow/airflow.py +11 -1
  62. metaflow/plugins/airflow/airflow_cli.py +16 -5
  63. metaflow/plugins/argo/argo_client.py +42 -20
  64. metaflow/plugins/argo/argo_events.py +6 -6
  65. metaflow/plugins/argo/argo_workflows.py +1023 -344
  66. metaflow/plugins/argo/argo_workflows_cli.py +396 -94
  67. metaflow/plugins/argo/argo_workflows_decorator.py +9 -0
  68. metaflow/plugins/argo/argo_workflows_deployer_objects.py +75 -49
  69. metaflow/plugins/argo/capture_error.py +5 -2
  70. metaflow/plugins/argo/conditional_input_paths.py +35 -0
  71. metaflow/plugins/argo/exit_hooks.py +209 -0
  72. metaflow/plugins/argo/param_val.py +19 -0
  73. metaflow/plugins/aws/aws_client.py +6 -0
  74. metaflow/plugins/aws/aws_utils.py +33 -1
  75. metaflow/plugins/aws/batch/batch.py +72 -5
  76. metaflow/plugins/aws/batch/batch_cli.py +24 -3
  77. metaflow/plugins/aws/batch/batch_decorator.py +57 -6
  78. metaflow/plugins/aws/step_functions/step_functions.py +28 -3
  79. metaflow/plugins/aws/step_functions/step_functions_cli.py +49 -4
  80. metaflow/plugins/aws/step_functions/step_functions_deployer.py +3 -0
  81. metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
  82. metaflow/plugins/cards/card_cli.py +20 -1
  83. metaflow/plugins/cards/card_creator.py +24 -1
  84. metaflow/plugins/cards/card_datastore.py +21 -49
  85. metaflow/plugins/cards/card_decorator.py +58 -6
  86. metaflow/plugins/cards/card_modules/basic.py +38 -9
  87. metaflow/plugins/cards/card_modules/bundle.css +1 -1
  88. metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
  89. metaflow/plugins/cards/card_modules/components.py +592 -3
  90. metaflow/plugins/cards/card_modules/convert_to_native_type.py +34 -5
  91. metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
  92. metaflow/plugins/cards/card_modules/main.css +1 -0
  93. metaflow/plugins/cards/card_modules/main.js +56 -41
  94. metaflow/plugins/cards/card_modules/test_cards.py +22 -6
  95. metaflow/plugins/cards/component_serializer.py +1 -8
  96. metaflow/plugins/cards/metadata.py +22 -0
  97. metaflow/plugins/catch_decorator.py +9 -0
  98. metaflow/plugins/datastores/local_storage.py +12 -6
  99. metaflow/plugins/datastores/spin_storage.py +12 -0
  100. metaflow/plugins/datatools/s3/s3.py +49 -17
  101. metaflow/plugins/datatools/s3/s3op.py +113 -66
  102. metaflow/plugins/env_escape/client_modules.py +102 -72
  103. metaflow/plugins/events_decorator.py +127 -121
  104. metaflow/plugins/exit_hook/__init__.py +0 -0
  105. metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
  106. metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
  107. metaflow/plugins/kubernetes/kubernetes.py +12 -1
  108. metaflow/plugins/kubernetes/kubernetes_cli.py +11 -0
  109. metaflow/plugins/kubernetes/kubernetes_decorator.py +25 -6
  110. metaflow/plugins/kubernetes/kubernetes_job.py +12 -4
  111. metaflow/plugins/kubernetes/kubernetes_jobsets.py +31 -30
  112. metaflow/plugins/metadata_providers/local.py +76 -82
  113. metaflow/plugins/metadata_providers/service.py +13 -9
  114. metaflow/plugins/metadata_providers/spin.py +16 -0
  115. metaflow/plugins/package_cli.py +36 -24
  116. metaflow/plugins/parallel_decorator.py +11 -2
  117. metaflow/plugins/parsers.py +16 -0
  118. metaflow/plugins/pypi/bootstrap.py +7 -1
  119. metaflow/plugins/pypi/conda_decorator.py +41 -82
  120. metaflow/plugins/pypi/conda_environment.py +14 -6
  121. metaflow/plugins/pypi/micromamba.py +9 -1
  122. metaflow/plugins/pypi/pip.py +41 -5
  123. metaflow/plugins/pypi/pypi_decorator.py +4 -4
  124. metaflow/plugins/pypi/utils.py +22 -0
  125. metaflow/plugins/secrets/__init__.py +3 -0
  126. metaflow/plugins/secrets/secrets_decorator.py +14 -178
  127. metaflow/plugins/secrets/secrets_func.py +49 -0
  128. metaflow/plugins/secrets/secrets_spec.py +101 -0
  129. metaflow/plugins/secrets/utils.py +74 -0
  130. metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
  131. metaflow/plugins/timeout_decorator.py +0 -1
  132. metaflow/plugins/uv/bootstrap.py +29 -1
  133. metaflow/plugins/uv/uv_environment.py +5 -3
  134. metaflow/pylint_wrapper.py +5 -1
  135. metaflow/runner/click_api.py +79 -26
  136. metaflow/runner/deployer.py +208 -6
  137. metaflow/runner/deployer_impl.py +32 -12
  138. metaflow/runner/metaflow_runner.py +266 -33
  139. metaflow/runner/subprocess_manager.py +21 -1
  140. metaflow/runner/utils.py +27 -16
  141. metaflow/runtime.py +660 -66
  142. metaflow/task.py +255 -26
  143. metaflow/user_configs/config_options.py +33 -21
  144. metaflow/user_configs/config_parameters.py +220 -58
  145. metaflow/user_decorators/__init__.py +0 -0
  146. metaflow/user_decorators/common.py +144 -0
  147. metaflow/user_decorators/mutable_flow.py +512 -0
  148. metaflow/user_decorators/mutable_step.py +424 -0
  149. metaflow/user_decorators/user_flow_decorator.py +264 -0
  150. metaflow/user_decorators/user_step_decorator.py +749 -0
  151. metaflow/util.py +197 -7
  152. metaflow/vendor.py +23 -7
  153. metaflow/version.py +1 -1
  154. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Makefile +13 -2
  155. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Tiltfile +107 -7
  156. {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/pick_services.sh +1 -0
  157. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/METADATA +2 -3
  158. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/RECORD +162 -121
  159. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
  160. metaflow/_vendor/v3_5/__init__.py +0 -1
  161. metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
  162. metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
  163. metaflow/_vendor/v3_5/zipp.py +0 -329
  164. metaflow/info_file.py +0 -25
  165. metaflow/package.py +0 -203
  166. metaflow/user_configs/config_decorators.py +0 -568
  167. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +0 -0
  168. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/licenses/LICENSE +0 -0
  169. {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
metaflow/task.py CHANGED
@@ -11,8 +11,10 @@ from types import MethodType, FunctionType
11
11
  from metaflow.sidecar import Message, MessageTypes
12
12
  from metaflow.datastore.exceptions import DataException
13
13
 
14
+ from metaflow.plugins import METADATA_PROVIDERS
14
15
  from .metaflow_config import MAX_ATTEMPTS
15
16
  from .metadata_provider import MetaDatum
17
+ from .metaflow_profile import from_start
16
18
  from .mflog import TASK_LOG_SOURCE
17
19
  from .datastore import Inputs, TaskDataStoreSet
18
20
  from .exception import (
@@ -24,6 +26,7 @@ from .unbounded_foreach import UBF_CONTROL
24
26
  from .util import all_equal, get_username, resolve_identity, unicode_type
25
27
  from .clone_util import clone_task_helper
26
28
  from .metaflow_current import current
29
+ from metaflow.user_configs.config_parameters import ConfigValue
27
30
  from metaflow.system import _system_logger, _system_monitor
28
31
  from metaflow.tracing import get_trace_id
29
32
  from metaflow.tuple_util import ForeachFrame
@@ -47,6 +50,8 @@ class MetaflowTask(object):
47
50
  event_logger,
48
51
  monitor,
49
52
  ubf_context,
53
+ orig_flow_datastore=None,
54
+ spin_artifacts=None,
50
55
  ):
51
56
  self.flow = flow
52
57
  self.flow_datastore = flow_datastore
@@ -56,12 +61,126 @@ class MetaflowTask(object):
56
61
  self.event_logger = event_logger
57
62
  self.monitor = monitor
58
63
  self.ubf_context = ubf_context
64
+ self.orig_flow_datastore = orig_flow_datastore
65
+ self.spin_artifacts = spin_artifacts
66
+
67
+ def _exec_step_function(self, step_function, orig_step_func, input_obj=None):
68
+ wrappers_stack = []
69
+ wrapped_func = None
70
+
71
+ # Will set to non-Falsy if we need to fake calling `self.next`
72
+ # This is used when skipping the step.
73
+ # If a dictionary, it will
74
+ # contain the arguments to pass to `self.next`. If
75
+ # True, it means we are using whatever the usual
76
+ # arguments to `self.next` are for this step.
77
+ fake_next_call_args = False
78
+ raised_exception = None
79
+ had_raised_exception = False
80
+
81
+ # If we have wrappers w1, w2 and w3, we need to execute
82
+ # - w3_pre
83
+ # - w2_pre
84
+ # - w1_pre
85
+ # - step_function
86
+ # - w1_post
87
+ # - w2_post
88
+ # - w3_post
89
+ # in that order. We do this by maintaining a stack of generators.
90
+ # Note that if any of the pre functions returns a function, we execute that
91
+ # instead of the rest of the inside part. This is useful if you want to create
92
+ # no-op function for example.
93
+ for w in reversed(orig_step_func.wrappers):
94
+ wrapped_func = w.pre_step(orig_step_func.name, self.flow, input_obj)
95
+ wrappers_stack.append(w)
96
+ if w.skip_step:
97
+ # We are not going to run anything so we will have to fake calling
98
+ # next.
99
+ fake_next_call_args = w.skip_step
100
+ break
101
+ if wrapped_func:
102
+ break # We have nothing left to do since we now execute the
103
+ # wrapped function
104
+ # Else, we continue down the list of wrappers
105
+ try:
106
+ # fake_next_call is used here to also indicate that the step was skipped
107
+ # so we do not execute anything.
108
+ if not fake_next_call_args:
109
+ if input_obj is None:
110
+ if wrapped_func:
111
+ fake_next_call_args = wrapped_func(self.flow)
112
+ else:
113
+ step_function()
114
+ else:
115
+ if wrapped_func:
116
+ fake_next_call_args = wrapped_func(self.flow, input_obj)
117
+ else:
118
+ step_function(input_obj)
119
+ except Exception as ex:
120
+ raised_exception = ex
121
+ had_raised_exception = True
59
122
 
60
- def _exec_step_function(self, step_function, input_obj=None):
61
- if input_obj is None:
62
- step_function()
63
- else:
64
- step_function(input_obj)
123
+ # We back out of the stack of generators
124
+ for w in reversed(wrappers_stack):
125
+ try:
126
+ r = w.post_step(orig_step_func.name, self.flow, raised_exception)
127
+ except Exception as ex:
128
+ r = ex
129
+ if r is None:
130
+ raised_exception = None
131
+ elif isinstance(r, Exception):
132
+ raised_exception = r
133
+ elif isinstance(r, tuple):
134
+ if len(r) == 2:
135
+ raised_exception, fake_next_call_args = r
136
+ else:
137
+ # The last argument is an exception to be re-raised. Used in
138
+ # user_step_decorator's post_step
139
+ raise r[2]
140
+ else:
141
+ raise RuntimeError(
142
+ "Invalid return value from a UserStepDecorator. Expected an"
143
+ "exception or an exception and arguments for self.next, got: %s" % r
144
+ )
145
+ if raised_exception:
146
+ # We have an exception that we need to propagate
147
+ raise raised_exception
148
+
149
+ if fake_next_call_args or had_raised_exception:
150
+ # We want to override the next call or we caught an exception (in which
151
+ # case the regular step code didn't call self.next). In this case,
152
+ # we need to set the transition variables
153
+ # properly. We call the next function as needed
154
+ # We also do this in case we want to gobble the exception.
155
+ graph_node = self.flow._graph[orig_step_func.name]
156
+ out_funcs = [getattr(self.flow, f) for f in graph_node.out_funcs]
157
+ if out_funcs:
158
+ self.flow._transition = None
159
+ if isinstance(fake_next_call_args, dict) and fake_next_call_args:
160
+ # Not an empty dictionary -- we use this as arguments for the next
161
+ # call
162
+ self.flow.next(*out_funcs, **fake_next_call_args)
163
+ elif (
164
+ fake_next_call_args == True
165
+ or fake_next_call_args == {}
166
+ or had_raised_exception
167
+ ):
168
+ # We need to extract things from the self.next. This is not possible
169
+ # in the case where there was a num_parallel.
170
+ if graph_node.parallel_foreach:
171
+ raise RuntimeError(
172
+ "Skipping a parallel foreach step without providing "
173
+ "the arguments to the self.next call is not supported. "
174
+ )
175
+ if graph_node.foreach_param:
176
+ self.flow.next(*out_funcs, foreach=graph_node.foreach_param)
177
+ else:
178
+ self.flow.next(*out_funcs)
179
+ else:
180
+ raise RuntimeError(
181
+ "Invalid value passed to self.next; expected "
182
+ " bool of a dictionary; got: %s" % fake_next_call_args
183
+ )
65
184
 
66
185
  def _init_parameters(self, parameter_ds, passdown=True):
67
186
  cls = self.flow.__class__
@@ -120,7 +239,6 @@ class MetaflowTask(object):
120
239
  lambda _, parameter_ds=parameter_ds: parameter_ds["_graph_info"],
121
240
  )
122
241
  all_vars.append("_graph_info")
123
-
124
242
  if passdown:
125
243
  self.flow._datastore.passdown_partial(parameter_ds, all_vars)
126
244
  return param_only_vars
@@ -136,6 +254,7 @@ class MetaflowTask(object):
136
254
  # Prefetch 'foreach' related artifacts to improve time taken by
137
255
  # _init_foreach.
138
256
  prefetch_data_artifacts = [
257
+ "_iteration_stack",
139
258
  "_foreach_stack",
140
259
  "_foreach_num_splits",
141
260
  "_foreach_var",
@@ -147,6 +266,9 @@ class MetaflowTask(object):
147
266
  run_id,
148
267
  pathspecs=input_paths,
149
268
  prefetch_data_artifacts=prefetch_data_artifacts,
269
+ join_type=join_type,
270
+ orig_flow_datastore=self.orig_flow_datastore,
271
+ spin_artifacts=self.spin_artifacts,
150
272
  )
151
273
  ds_list = [ds for ds in datastore_set]
152
274
  if len(ds_list) != len(input_paths):
@@ -158,10 +280,27 @@ class MetaflowTask(object):
158
280
  # initialize directly in the single input case.
159
281
  ds_list = []
160
282
  for input_path in input_paths:
161
- run_id, step_name, task_id = input_path.split("/")
283
+ parts = input_path.split("/")
284
+ if len(parts) == 3:
285
+ run_id, step_name, task_id = parts
286
+ attempt = None
287
+ else:
288
+ run_id, step_name, task_id, attempt = parts
289
+ attempt = int(attempt)
290
+
162
291
  ds_list.append(
163
- self.flow_datastore.get_task_datastore(run_id, step_name, task_id)
292
+ self.flow_datastore.get_task_datastore(
293
+ run_id,
294
+ step_name,
295
+ task_id,
296
+ attempt=attempt,
297
+ join_type=join_type,
298
+ orig_flow_datastore=self.orig_flow_datastore,
299
+ spin_artifacts=self.spin_artifacts,
300
+ )
164
301
  )
302
+ from_start("MetaflowTask: got datastore for input path %s" % input_path)
303
+
165
304
  if not ds_list:
166
305
  # this guards against errors in input paths
167
306
  raise MetaflowDataMissing(
@@ -272,6 +411,56 @@ class MetaflowTask(object):
272
411
  elif "_foreach_stack" in inputs[0]:
273
412
  self.flow._foreach_stack = inputs[0]["_foreach_stack"]
274
413
 
414
+ def _init_iteration(self, step_name, inputs, is_recursive_step):
415
+ # We track the iteration "stack" for loops. At this time, we
416
+ # only support one type of "looping" which is a recursive step but
417
+ # this can generalize to arbitrary well-scoped loops in the future.
418
+
419
+ # _iteration_stack will contain the iteration count for each loop
420
+ # level. Currently, there will be only no elements (no loops) or
421
+ # a single element (a single recursive step).
422
+
423
+ # We just need to determine the rules to add a new looping level,
424
+ # increment the looping level or pop the looping level. In our
425
+ # current support for only recursive steps, this is pretty straightforward:
426
+ # 1) if is_recursive_step:
427
+ # - we are entering a loop -- we are either entering for the first time
428
+ # or we are continuing the loop. Note that a recursive step CANNOT
429
+ # be a join step so there is always a single input
430
+ # 1a) If inputs[0]["_iteration_stack"] contains an element, we are looping
431
+ # so we increment the count
432
+ # 1b) If inputs[0]["_iteration_stack"] is empty, this is the first time we
433
+ # are entering the loop so we set the iteration count to 0
434
+ # 2) if it is not a recursive step, we need to determine if this is the step
435
+ # *after* the recursive step. The easiest way to determine that is to
436
+ # look at all inputs (there can be multiple in case of a join) and pop
437
+ # _iteration_stack if it is set. However, since we know that non recursive
438
+ # steps are *never* part of an iteration, we can simplify and just set it
439
+ # to [] without even checking anything. We will have to revisit this if/when
440
+ # more complex loop structures are supported.
441
+
442
+ # Note that just like _foreach_stack, we need to set _iteration_stack to *something*
443
+ # so that it doesn't get clobbered weirdly by merge_artifacts.
444
+
445
+ if is_recursive_step:
446
+ # Case 1)
447
+ if len(inputs) != 1:
448
+ raise MetaflowInternalError(
449
+ "Step *%s* is a recursive step but got multiple inputs." % step_name
450
+ )
451
+ inp = inputs[0]
452
+ if "_iteration_stack" not in inp or not inp["_iteration_stack"]:
453
+ # Case 1b)
454
+ self.flow._iteration_stack = [0]
455
+ else:
456
+ # Case 1a)
457
+ stack = inp["_iteration_stack"]
458
+ stack[-1] += 1
459
+ self.flow._iteration_stack = stack
460
+ else:
461
+ # Case 2)
462
+ self.flow._iteration_stack = []
463
+
275
464
  def _clone_flow(self, datastore):
276
465
  x = self.flow.__class__(use_cli=False)
277
466
  x._set_datastore(datastore)
@@ -382,6 +571,8 @@ class MetaflowTask(object):
382
571
  split_index,
383
572
  retry_count,
384
573
  max_user_code_retries,
574
+ whitelist_decorators=None,
575
+ persist=True,
385
576
  ):
386
577
  if run_id and task_id:
387
578
  self.metadata.register_run_id(run_id)
@@ -440,7 +631,14 @@ class MetaflowTask(object):
440
631
 
441
632
  step_func = getattr(self.flow, step_name)
442
633
  decorators = step_func.decorators
443
-
634
+ if self.orig_flow_datastore:
635
+ # We filter only the whitelisted decorators in case of spin step.
636
+ decorators = (
637
+ []
638
+ if not whitelist_decorators
639
+ else [deco for deco in decorators if deco.name in whitelist_decorators]
640
+ )
641
+ from_start("MetaflowTask: decorators initialized")
444
642
  node = self.flow._graph[step_name]
445
643
  join_type = None
446
644
  if node.type == "join":
@@ -448,17 +646,26 @@ class MetaflowTask(object):
448
646
 
449
647
  # 1. initialize output datastore
450
648
  output = self.flow_datastore.get_task_datastore(
451
- run_id, step_name, task_id, attempt=retry_count, mode="w"
649
+ run_id, step_name, task_id, attempt=retry_count, mode="w", persist=persist
452
650
  )
453
651
 
454
652
  output.init_task()
653
+ from_start("MetaflowTask: output datastore initialized")
455
654
 
456
655
  if input_paths:
457
656
  # 2. initialize input datastores
458
657
  inputs = self._init_data(run_id, join_type, input_paths)
658
+ from_start("MetaflowTask: input datastores initialized")
459
659
 
460
660
  # 3. initialize foreach state
461
661
  self._init_foreach(step_name, join_type, inputs, split_index)
662
+ from_start("MetaflowTask: foreach state initialized")
663
+
664
+ # 4. initialize the iteration state
665
+ is_recursive_step = (
666
+ node.type == "split-switch" and step_name in node.out_funcs
667
+ )
668
+ self._init_iteration(step_name, inputs, is_recursive_step)
462
669
 
463
670
  # Add foreach stack to metadata of the task
464
671
 
@@ -511,7 +718,7 @@ class MetaflowTask(object):
511
718
  ),
512
719
  ]
513
720
  )
514
-
721
+ from_start("MetaflowTask: finished input processing")
515
722
  self.metadata.register_metadata(
516
723
  run_id,
517
724
  step_name,
@@ -538,6 +745,9 @@ class MetaflowTask(object):
538
745
  output.save_metadata(
539
746
  {
540
747
  "task_begin": {
748
+ "code_package_metadata": os.environ.get(
749
+ "METAFLOW_CODE_METADATA", ""
750
+ ),
541
751
  "code_package_sha": os.environ.get("METAFLOW_CODE_SHA"),
542
752
  "code_package_ds": os.environ.get("METAFLOW_CODE_DS"),
543
753
  "code_package_url": os.environ.get("METAFLOW_CODE_URL"),
@@ -562,8 +772,11 @@ class MetaflowTask(object):
562
772
  "project_flow_name": current.get("project_flow_name"),
563
773
  "trace_id": trace_id or None,
564
774
  }
775
+
776
+ from_start("MetaflowTask: task metadata initialized")
565
777
  start = time.time()
566
778
  self.metadata.start_task_heartbeat(self.flow.name, run_id, step_name, task_id)
779
+ from_start("MetaflowTask: heartbeat started")
567
780
  with self.monitor.measure("metaflow.task.duration"):
568
781
  try:
569
782
  with self.monitor.count("metaflow.task.start"):
@@ -583,18 +796,23 @@ class MetaflowTask(object):
583
796
  # should either be set prior to running the user code or listed in
584
797
  # FlowSpec._EPHEMERAL to allow for proper merging/importing of
585
798
  # user artifacts in the user's step code.
586
-
587
799
  if join_type:
588
800
  # Join step:
589
801
 
590
- # Ensure that we have the right number of inputs. The
591
- # foreach case is checked above.
592
- if join_type != "foreach" and len(inputs) != len(node.in_funcs):
593
- raise MetaflowDataMissing(
594
- "Join *%s* expected %d "
595
- "inputs but only %d inputs "
596
- "were found" % (step_name, len(node.in_funcs), len(inputs))
597
- )
802
+ # Ensure that we have the right number of inputs.
803
+ if join_type != "foreach":
804
+ # Find the corresponding split node from the graph.
805
+ split_node = self.flow._graph[node.split_parents[-1]]
806
+ # The number of expected inputs is the number of branches
807
+ # from that split -- we can't use in_funcs because there may
808
+ # be more due to split-switch branches that all converge here.
809
+ expected_inputs = len(split_node.out_funcs)
810
+
811
+ if len(inputs) != expected_inputs:
812
+ raise MetaflowDataMissing(
813
+ "Join *%s* expected %d inputs but only %d inputs "
814
+ "were found" % (step_name, expected_inputs, len(inputs))
815
+ )
598
816
 
599
817
  # Multiple input contexts are passed in as an argument
600
818
  # to the step function.
@@ -636,11 +854,19 @@ class MetaflowTask(object):
636
854
  "graph_info": self.flow._graph_info,
637
855
  }
638
856
  )
857
+ from_start("MetaflowTask: before pre-step decorators")
639
858
  for deco in decorators:
859
+ if deco.name == "card" and self.orig_flow_datastore:
860
+ # if spin step and card decorator, pass spin metadata
861
+ metadata = [m for m in METADATA_PROVIDERS if m.TYPE == "spin"][
862
+ 0
863
+ ](self.environment, self.flow, self.event_logger, self.monitor)
864
+ else:
865
+ metadata = self.metadata
640
866
  deco.task_pre_step(
641
867
  step_name,
642
868
  output,
643
- self.metadata,
869
+ metadata,
644
870
  run_id,
645
871
  task_id,
646
872
  self.flow,
@@ -651,6 +877,7 @@ class MetaflowTask(object):
651
877
  inputs,
652
878
  )
653
879
 
880
+ orig_step_func = step_func
654
881
  for deco in decorators:
655
882
  # decorators can actually decorate the step function,
656
883
  # or they can replace it altogether. This functionality
@@ -665,12 +892,12 @@ class MetaflowTask(object):
665
892
  max_user_code_retries,
666
893
  self.ubf_context,
667
894
  )
668
-
895
+ from_start("MetaflowTask: finished decorator processing")
669
896
  if join_type:
670
- self._exec_step_function(step_func, input_obj)
897
+ self._exec_step_function(step_func, orig_step_func, input_obj)
671
898
  else:
672
- self._exec_step_function(step_func)
673
-
899
+ self._exec_step_function(step_func, orig_step_func)
900
+ from_start("MetaflowTask: step function executed")
674
901
  for deco in decorators:
675
902
  deco.task_post_step(
676
903
  step_name,
@@ -713,6 +940,7 @@ class MetaflowTask(object):
713
940
  raise
714
941
 
715
942
  finally:
943
+ from_start("MetaflowTask: decorators finalized")
716
944
  if self.ubf_context == UBF_CONTROL:
717
945
  self._finalize_control_task()
718
946
 
@@ -752,7 +980,7 @@ class MetaflowTask(object):
752
980
  )
753
981
 
754
982
  output.save_metadata({"task_end": {}})
755
-
983
+ from_start("MetaflowTask: output persisted")
756
984
  # this writes a success marker indicating that the
757
985
  # "transaction" is done
758
986
  output.done()
@@ -781,3 +1009,4 @@ class MetaflowTask(object):
781
1009
  name="duration",
782
1010
  payload={**task_payload, "msg": str(duration)},
783
1011
  )
1012
+ from_start("MetaflowTask: task run completed")
@@ -7,8 +7,9 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
7
7
  from metaflow._vendor import click
8
8
  from metaflow.debug import debug
9
9
 
10
- from .config_parameters import CONFIG_FILE, ConfigValue
10
+ from .config_parameters import ConfigValue
11
11
  from ..exception import MetaflowException, MetaflowInternalError
12
+ from ..packaging_sys import MetaflowCodeContent
12
13
  from ..parameters import DeployTimeField, ParameterContext, current_flow
13
14
  from ..util import get_username
14
15
 
@@ -24,12 +25,16 @@ _CONVERTED_DEFAULT_NO_FILE = _CONVERTED_DEFAULT + _NO_FILE
24
25
 
25
26
  def _load_config_values(info_file: Optional[str] = None) -> Optional[Dict[Any, Any]]:
26
27
  if info_file is None:
27
- info_file = os.path.basename(CONFIG_FILE)
28
- try:
29
- with open(info_file, encoding="utf-8") as contents:
30
- return json.load(contents).get("user_configs", {})
31
- except IOError:
32
- return None
28
+ config_content = MetaflowCodeContent.get_config()
29
+ else:
30
+ try:
31
+ with open(info_file, encoding="utf-8") as f:
32
+ config_content = json.load(f)
33
+ except IOError:
34
+ return None
35
+ if config_content:
36
+ return config_content.get("user_configs", {})
37
+ return None
33
38
 
34
39
 
35
40
  class ConvertPath(click.Path):
@@ -181,7 +186,7 @@ class ConfigInput:
181
186
  click_obj: Optional[Any] = None,
182
187
  ):
183
188
  from ..cli import echo_always, echo_dev_null # Prevent circular import
184
- from ..flowspec import _FlowState # Prevent circular import
189
+ from ..flowspec import FlowStateItems # Prevent circular import
185
190
 
186
191
  flow_cls = getattr(current_flow, "flow_cls", None)
187
192
  if flow_cls is None:
@@ -221,13 +226,13 @@ class ConfigInput:
221
226
  if param_name == "config_value":
222
227
  self._value_values = {
223
228
  k.lower(): v
224
- for k, v in param_value
229
+ for k, v in param_value.items()
225
230
  if v is not None and not v.startswith(_CONVERTED_DEFAULT)
226
231
  }
227
232
  else:
228
233
  self._path_values = {
229
234
  k.lower(): v
230
- for k, v in param_value
235
+ for k, v in param_value.items()
231
236
  if v is not None and not v.startswith(_CONVERTED_DEFAULT)
232
237
  }
233
238
  if do_return:
@@ -255,7 +260,6 @@ class ConfigInput:
255
260
  for k in all_keys
256
261
  )
257
262
 
258
- flow_cls._flow_state[_FlowState.CONFIGS] = {}
259
263
  to_return = {}
260
264
 
261
265
  if not has_all_kv:
@@ -327,14 +331,14 @@ class ConfigInput:
327
331
  if val is None:
328
332
  missing_configs.add(name)
329
333
  to_return[name] = None
330
- flow_cls._flow_state[_FlowState.CONFIGS][name] = None
331
- continue
332
- if val.startswith(_CONVERTED_DEFAULT_NO_FILE):
333
- no_default_file.append(name)
334
+ flow_cls._flow_state.self_data[FlowStateItems.CONFIGS][name] = None
334
335
  continue
335
336
  if val.startswith(_CONVERTED_NO_FILE):
336
337
  no_file.append(name)
337
338
  continue
339
+ if val.startswith(_CONVERTED_DEFAULT_NO_FILE):
340
+ no_default_file.append(name)
341
+ continue
338
342
 
339
343
  val = val[len(_CONVERT_PREFIX) :] # Remove the _CONVERT_PREFIX
340
344
  if val.startswith(_DEFAULT_PREFIX): # Remove the _DEFAULT_PREFIX if needed
@@ -351,8 +355,12 @@ class ConfigInput:
351
355
  click_obj.delayed_config_exception = exc
352
356
  return None
353
357
  raise exc from e
354
- flow_cls._flow_state[_FlowState.CONFIGS][name] = read_value
355
- to_return[name] = ConfigValue(read_value)
358
+ flow_cls._flow_state.self_data[FlowStateItems.CONFIGS][
359
+ name
360
+ ] = read_value
361
+ to_return[name] = (
362
+ ConfigValue(read_value) if read_value is not None else None
363
+ )
356
364
  else:
357
365
  if self._parsers[name]:
358
366
  read_value = self._call_parser(self._parsers[name], val)
@@ -366,8 +374,12 @@ class ConfigInput:
366
374
  )
367
375
  continue
368
376
  # TODO: Support YAML
369
- flow_cls._flow_state[_FlowState.CONFIGS][name] = read_value
370
- to_return[name] = ConfigValue(read_value)
377
+ flow_cls._flow_state.self_data[FlowStateItems.CONFIGS][
378
+ name
379
+ ] = read_value
380
+ to_return[name] = (
381
+ ConfigValue(read_value) if read_value is not None else None
382
+ )
371
383
 
372
384
  reqs = missing_configs.intersection(self._req_configs)
373
385
  for missing in reqs:
@@ -398,7 +410,7 @@ class ConfigInput:
398
410
  return self.process_configs(
399
411
  ctx.obj.flow.name,
400
412
  param.name,
401
- value,
413
+ dict(value),
402
414
  ctx.params["quiet"],
403
415
  ctx.params["datastore"],
404
416
  click_obj=ctx.obj,
@@ -433,7 +445,7 @@ class LocalFileInput(click.Path):
433
445
  # Small wrapper around click.Path to set the value from which to read configuration
434
446
  # values. This is set immediately upon processing the --local-config-file
435
447
  # option and will therefore then be available when processing any of the other
436
- # --config options (which will call ConfigInput.process_configs
448
+ # --config options (which will call ConfigInput.process_configs)
437
449
  name = "LocalFileInput"
438
450
 
439
451
  def convert(self, value, param, ctx):