ob-metaflow 2.15.13.1__py2.py3-none-any.whl → 2.19.7.1rc0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +10 -3
- metaflow/_vendor/imghdr/__init__.py +186 -0
- metaflow/_vendor/yaml/__init__.py +427 -0
- metaflow/_vendor/yaml/composer.py +139 -0
- metaflow/_vendor/yaml/constructor.py +748 -0
- metaflow/_vendor/yaml/cyaml.py +101 -0
- metaflow/_vendor/yaml/dumper.py +62 -0
- metaflow/_vendor/yaml/emitter.py +1137 -0
- metaflow/_vendor/yaml/error.py +75 -0
- metaflow/_vendor/yaml/events.py +86 -0
- metaflow/_vendor/yaml/loader.py +63 -0
- metaflow/_vendor/yaml/nodes.py +49 -0
- metaflow/_vendor/yaml/parser.py +589 -0
- metaflow/_vendor/yaml/reader.py +185 -0
- metaflow/_vendor/yaml/representer.py +389 -0
- metaflow/_vendor/yaml/resolver.py +227 -0
- metaflow/_vendor/yaml/scanner.py +1435 -0
- metaflow/_vendor/yaml/serializer.py +111 -0
- metaflow/_vendor/yaml/tokens.py +104 -0
- metaflow/cards.py +4 -0
- metaflow/cli.py +125 -21
- metaflow/cli_components/init_cmd.py +1 -0
- metaflow/cli_components/run_cmds.py +204 -40
- metaflow/cli_components/step_cmd.py +160 -4
- metaflow/client/__init__.py +1 -0
- metaflow/client/core.py +198 -130
- metaflow/client/filecache.py +59 -32
- metaflow/cmd/code/__init__.py +2 -1
- metaflow/cmd/develop/stub_generator.py +49 -18
- metaflow/cmd/develop/stubs.py +9 -27
- metaflow/cmd/make_wrapper.py +30 -0
- metaflow/datastore/__init__.py +1 -0
- metaflow/datastore/content_addressed_store.py +40 -9
- metaflow/datastore/datastore_set.py +10 -1
- metaflow/datastore/flow_datastore.py +124 -4
- metaflow/datastore/spin_datastore.py +91 -0
- metaflow/datastore/task_datastore.py +92 -6
- metaflow/debug.py +5 -0
- metaflow/decorators.py +331 -82
- metaflow/extension_support/__init__.py +414 -356
- metaflow/extension_support/_empty_file.py +2 -2
- metaflow/flowspec.py +322 -82
- metaflow/graph.py +178 -15
- metaflow/includefile.py +25 -3
- metaflow/lint.py +94 -3
- metaflow/meta_files.py +13 -0
- metaflow/metadata_provider/metadata.py +13 -2
- metaflow/metaflow_config.py +66 -4
- metaflow/metaflow_environment.py +91 -25
- metaflow/metaflow_profile.py +18 -0
- metaflow/metaflow_version.py +16 -1
- metaflow/package/__init__.py +673 -0
- metaflow/packaging_sys/__init__.py +880 -0
- metaflow/packaging_sys/backend.py +128 -0
- metaflow/packaging_sys/distribution_support.py +153 -0
- metaflow/packaging_sys/tar_backend.py +99 -0
- metaflow/packaging_sys/utils.py +54 -0
- metaflow/packaging_sys/v1.py +527 -0
- metaflow/parameters.py +6 -2
- metaflow/plugins/__init__.py +6 -0
- metaflow/plugins/airflow/airflow.py +11 -1
- metaflow/plugins/airflow/airflow_cli.py +16 -5
- metaflow/plugins/argo/argo_client.py +42 -20
- metaflow/plugins/argo/argo_events.py +6 -6
- metaflow/plugins/argo/argo_workflows.py +1023 -344
- metaflow/plugins/argo/argo_workflows_cli.py +396 -94
- metaflow/plugins/argo/argo_workflows_decorator.py +9 -0
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +75 -49
- metaflow/plugins/argo/capture_error.py +5 -2
- metaflow/plugins/argo/conditional_input_paths.py +35 -0
- metaflow/plugins/argo/exit_hooks.py +209 -0
- metaflow/plugins/argo/param_val.py +19 -0
- metaflow/plugins/aws/aws_client.py +6 -0
- metaflow/plugins/aws/aws_utils.py +33 -1
- metaflow/plugins/aws/batch/batch.py +72 -5
- metaflow/plugins/aws/batch/batch_cli.py +24 -3
- metaflow/plugins/aws/batch/batch_decorator.py +57 -6
- metaflow/plugins/aws/step_functions/step_functions.py +28 -3
- metaflow/plugins/aws/step_functions/step_functions_cli.py +49 -4
- metaflow/plugins/aws/step_functions/step_functions_deployer.py +3 -0
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +30 -0
- metaflow/plugins/cards/card_cli.py +20 -1
- metaflow/plugins/cards/card_creator.py +24 -1
- metaflow/plugins/cards/card_datastore.py +21 -49
- metaflow/plugins/cards/card_decorator.py +58 -6
- metaflow/plugins/cards/card_modules/basic.py +38 -9
- metaflow/plugins/cards/card_modules/bundle.css +1 -1
- metaflow/plugins/cards/card_modules/chevron/renderer.py +1 -1
- metaflow/plugins/cards/card_modules/components.py +592 -3
- metaflow/plugins/cards/card_modules/convert_to_native_type.py +34 -5
- metaflow/plugins/cards/card_modules/json_viewer.py +232 -0
- metaflow/plugins/cards/card_modules/main.css +1 -0
- metaflow/plugins/cards/card_modules/main.js +56 -41
- metaflow/plugins/cards/card_modules/test_cards.py +22 -6
- metaflow/plugins/cards/component_serializer.py +1 -8
- metaflow/plugins/cards/metadata.py +22 -0
- metaflow/plugins/catch_decorator.py +9 -0
- metaflow/plugins/datastores/local_storage.py +12 -6
- metaflow/plugins/datastores/spin_storage.py +12 -0
- metaflow/plugins/datatools/s3/s3.py +49 -17
- metaflow/plugins/datatools/s3/s3op.py +113 -66
- metaflow/plugins/env_escape/client_modules.py +102 -72
- metaflow/plugins/events_decorator.py +127 -121
- metaflow/plugins/exit_hook/__init__.py +0 -0
- metaflow/plugins/exit_hook/exit_hook_decorator.py +46 -0
- metaflow/plugins/exit_hook/exit_hook_script.py +52 -0
- metaflow/plugins/kubernetes/kubernetes.py +12 -1
- metaflow/plugins/kubernetes/kubernetes_cli.py +11 -0
- metaflow/plugins/kubernetes/kubernetes_decorator.py +25 -6
- metaflow/plugins/kubernetes/kubernetes_job.py +12 -4
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +31 -30
- metaflow/plugins/metadata_providers/local.py +76 -82
- metaflow/plugins/metadata_providers/service.py +13 -9
- metaflow/plugins/metadata_providers/spin.py +16 -0
- metaflow/plugins/package_cli.py +36 -24
- metaflow/plugins/parallel_decorator.py +11 -2
- metaflow/plugins/parsers.py +16 -0
- metaflow/plugins/pypi/bootstrap.py +7 -1
- metaflow/plugins/pypi/conda_decorator.py +41 -82
- metaflow/plugins/pypi/conda_environment.py +14 -6
- metaflow/plugins/pypi/micromamba.py +9 -1
- metaflow/plugins/pypi/pip.py +41 -5
- metaflow/plugins/pypi/pypi_decorator.py +4 -4
- metaflow/plugins/pypi/utils.py +22 -0
- metaflow/plugins/secrets/__init__.py +3 -0
- metaflow/plugins/secrets/secrets_decorator.py +14 -178
- metaflow/plugins/secrets/secrets_func.py +49 -0
- metaflow/plugins/secrets/secrets_spec.py +101 -0
- metaflow/plugins/secrets/utils.py +74 -0
- metaflow/plugins/test_unbounded_foreach_decorator.py +2 -2
- metaflow/plugins/timeout_decorator.py +0 -1
- metaflow/plugins/uv/bootstrap.py +29 -1
- metaflow/plugins/uv/uv_environment.py +5 -3
- metaflow/pylint_wrapper.py +5 -1
- metaflow/runner/click_api.py +79 -26
- metaflow/runner/deployer.py +208 -6
- metaflow/runner/deployer_impl.py +32 -12
- metaflow/runner/metaflow_runner.py +266 -33
- metaflow/runner/subprocess_manager.py +21 -1
- metaflow/runner/utils.py +27 -16
- metaflow/runtime.py +660 -66
- metaflow/task.py +255 -26
- metaflow/user_configs/config_options.py +33 -21
- metaflow/user_configs/config_parameters.py +220 -58
- metaflow/user_decorators/__init__.py +0 -0
- metaflow/user_decorators/common.py +144 -0
- metaflow/user_decorators/mutable_flow.py +512 -0
- metaflow/user_decorators/mutable_step.py +424 -0
- metaflow/user_decorators/user_flow_decorator.py +264 -0
- metaflow/user_decorators/user_step_decorator.py +749 -0
- metaflow/util.py +197 -7
- metaflow/vendor.py +23 -7
- metaflow/version.py +1 -1
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Makefile +13 -2
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/Tiltfile +107 -7
- {ob_metaflow-2.15.13.1.data → ob_metaflow-2.19.7.1rc0.data}/data/share/metaflow/devtools/pick_services.sh +1 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/METADATA +2 -3
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/RECORD +162 -121
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/WHEEL +1 -1
- metaflow/_vendor/v3_5/__init__.py +0 -1
- metaflow/_vendor/v3_5/importlib_metadata/__init__.py +0 -644
- metaflow/_vendor/v3_5/importlib_metadata/_compat.py +0 -152
- metaflow/_vendor/v3_5/zipp.py +0 -329
- metaflow/info_file.py +0 -25
- metaflow/package.py +0 -203
- metaflow/user_configs/config_decorators.py +0 -568
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/licenses/LICENSE +0 -0
- {ob_metaflow-2.15.13.1.dist-info → ob_metaflow-2.19.7.1rc0.dist-info}/top_level.txt +0 -0
metaflow/task.py
CHANGED
|
@@ -11,8 +11,10 @@ from types import MethodType, FunctionType
|
|
|
11
11
|
from metaflow.sidecar import Message, MessageTypes
|
|
12
12
|
from metaflow.datastore.exceptions import DataException
|
|
13
13
|
|
|
14
|
+
from metaflow.plugins import METADATA_PROVIDERS
|
|
14
15
|
from .metaflow_config import MAX_ATTEMPTS
|
|
15
16
|
from .metadata_provider import MetaDatum
|
|
17
|
+
from .metaflow_profile import from_start
|
|
16
18
|
from .mflog import TASK_LOG_SOURCE
|
|
17
19
|
from .datastore import Inputs, TaskDataStoreSet
|
|
18
20
|
from .exception import (
|
|
@@ -24,6 +26,7 @@ from .unbounded_foreach import UBF_CONTROL
|
|
|
24
26
|
from .util import all_equal, get_username, resolve_identity, unicode_type
|
|
25
27
|
from .clone_util import clone_task_helper
|
|
26
28
|
from .metaflow_current import current
|
|
29
|
+
from metaflow.user_configs.config_parameters import ConfigValue
|
|
27
30
|
from metaflow.system import _system_logger, _system_monitor
|
|
28
31
|
from metaflow.tracing import get_trace_id
|
|
29
32
|
from metaflow.tuple_util import ForeachFrame
|
|
@@ -47,6 +50,8 @@ class MetaflowTask(object):
|
|
|
47
50
|
event_logger,
|
|
48
51
|
monitor,
|
|
49
52
|
ubf_context,
|
|
53
|
+
orig_flow_datastore=None,
|
|
54
|
+
spin_artifacts=None,
|
|
50
55
|
):
|
|
51
56
|
self.flow = flow
|
|
52
57
|
self.flow_datastore = flow_datastore
|
|
@@ -56,12 +61,126 @@ class MetaflowTask(object):
|
|
|
56
61
|
self.event_logger = event_logger
|
|
57
62
|
self.monitor = monitor
|
|
58
63
|
self.ubf_context = ubf_context
|
|
64
|
+
self.orig_flow_datastore = orig_flow_datastore
|
|
65
|
+
self.spin_artifacts = spin_artifacts
|
|
66
|
+
|
|
67
|
+
def _exec_step_function(self, step_function, orig_step_func, input_obj=None):
|
|
68
|
+
wrappers_stack = []
|
|
69
|
+
wrapped_func = None
|
|
70
|
+
|
|
71
|
+
# Will set to non-Falsy if we need to fake calling `self.next`
|
|
72
|
+
# This is used when skipping the step.
|
|
73
|
+
# If a dictionary, it will
|
|
74
|
+
# contain the arguments to pass to `self.next`. If
|
|
75
|
+
# True, it means we are using whatever the usual
|
|
76
|
+
# arguments to `self.next` are for this step.
|
|
77
|
+
fake_next_call_args = False
|
|
78
|
+
raised_exception = None
|
|
79
|
+
had_raised_exception = False
|
|
80
|
+
|
|
81
|
+
# If we have wrappers w1, w2 and w3, we need to execute
|
|
82
|
+
# - w3_pre
|
|
83
|
+
# - w2_pre
|
|
84
|
+
# - w1_pre
|
|
85
|
+
# - step_function
|
|
86
|
+
# - w1_post
|
|
87
|
+
# - w2_post
|
|
88
|
+
# - w3_post
|
|
89
|
+
# in that order. We do this by maintaining a stack of generators.
|
|
90
|
+
# Note that if any of the pre functions returns a function, we execute that
|
|
91
|
+
# instead of the rest of the inside part. This is useful if you want to create
|
|
92
|
+
# no-op function for example.
|
|
93
|
+
for w in reversed(orig_step_func.wrappers):
|
|
94
|
+
wrapped_func = w.pre_step(orig_step_func.name, self.flow, input_obj)
|
|
95
|
+
wrappers_stack.append(w)
|
|
96
|
+
if w.skip_step:
|
|
97
|
+
# We are not going to run anything so we will have to fake calling
|
|
98
|
+
# next.
|
|
99
|
+
fake_next_call_args = w.skip_step
|
|
100
|
+
break
|
|
101
|
+
if wrapped_func:
|
|
102
|
+
break # We have nothing left to do since we now execute the
|
|
103
|
+
# wrapped function
|
|
104
|
+
# Else, we continue down the list of wrappers
|
|
105
|
+
try:
|
|
106
|
+
# fake_next_call is used here to also indicate that the step was skipped
|
|
107
|
+
# so we do not execute anything.
|
|
108
|
+
if not fake_next_call_args:
|
|
109
|
+
if input_obj is None:
|
|
110
|
+
if wrapped_func:
|
|
111
|
+
fake_next_call_args = wrapped_func(self.flow)
|
|
112
|
+
else:
|
|
113
|
+
step_function()
|
|
114
|
+
else:
|
|
115
|
+
if wrapped_func:
|
|
116
|
+
fake_next_call_args = wrapped_func(self.flow, input_obj)
|
|
117
|
+
else:
|
|
118
|
+
step_function(input_obj)
|
|
119
|
+
except Exception as ex:
|
|
120
|
+
raised_exception = ex
|
|
121
|
+
had_raised_exception = True
|
|
59
122
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
123
|
+
# We back out of the stack of generators
|
|
124
|
+
for w in reversed(wrappers_stack):
|
|
125
|
+
try:
|
|
126
|
+
r = w.post_step(orig_step_func.name, self.flow, raised_exception)
|
|
127
|
+
except Exception as ex:
|
|
128
|
+
r = ex
|
|
129
|
+
if r is None:
|
|
130
|
+
raised_exception = None
|
|
131
|
+
elif isinstance(r, Exception):
|
|
132
|
+
raised_exception = r
|
|
133
|
+
elif isinstance(r, tuple):
|
|
134
|
+
if len(r) == 2:
|
|
135
|
+
raised_exception, fake_next_call_args = r
|
|
136
|
+
else:
|
|
137
|
+
# The last argument is an exception to be re-raised. Used in
|
|
138
|
+
# user_step_decorator's post_step
|
|
139
|
+
raise r[2]
|
|
140
|
+
else:
|
|
141
|
+
raise RuntimeError(
|
|
142
|
+
"Invalid return value from a UserStepDecorator. Expected an"
|
|
143
|
+
"exception or an exception and arguments for self.next, got: %s" % r
|
|
144
|
+
)
|
|
145
|
+
if raised_exception:
|
|
146
|
+
# We have an exception that we need to propagate
|
|
147
|
+
raise raised_exception
|
|
148
|
+
|
|
149
|
+
if fake_next_call_args or had_raised_exception:
|
|
150
|
+
# We want to override the next call or we caught an exception (in which
|
|
151
|
+
# case the regular step code didn't call self.next). In this case,
|
|
152
|
+
# we need to set the transition variables
|
|
153
|
+
# properly. We call the next function as needed
|
|
154
|
+
# We also do this in case we want to gobble the exception.
|
|
155
|
+
graph_node = self.flow._graph[orig_step_func.name]
|
|
156
|
+
out_funcs = [getattr(self.flow, f) for f in graph_node.out_funcs]
|
|
157
|
+
if out_funcs:
|
|
158
|
+
self.flow._transition = None
|
|
159
|
+
if isinstance(fake_next_call_args, dict) and fake_next_call_args:
|
|
160
|
+
# Not an empty dictionary -- we use this as arguments for the next
|
|
161
|
+
# call
|
|
162
|
+
self.flow.next(*out_funcs, **fake_next_call_args)
|
|
163
|
+
elif (
|
|
164
|
+
fake_next_call_args == True
|
|
165
|
+
or fake_next_call_args == {}
|
|
166
|
+
or had_raised_exception
|
|
167
|
+
):
|
|
168
|
+
# We need to extract things from the self.next. This is not possible
|
|
169
|
+
# in the case where there was a num_parallel.
|
|
170
|
+
if graph_node.parallel_foreach:
|
|
171
|
+
raise RuntimeError(
|
|
172
|
+
"Skipping a parallel foreach step without providing "
|
|
173
|
+
"the arguments to the self.next call is not supported. "
|
|
174
|
+
)
|
|
175
|
+
if graph_node.foreach_param:
|
|
176
|
+
self.flow.next(*out_funcs, foreach=graph_node.foreach_param)
|
|
177
|
+
else:
|
|
178
|
+
self.flow.next(*out_funcs)
|
|
179
|
+
else:
|
|
180
|
+
raise RuntimeError(
|
|
181
|
+
"Invalid value passed to self.next; expected "
|
|
182
|
+
" bool of a dictionary; got: %s" % fake_next_call_args
|
|
183
|
+
)
|
|
65
184
|
|
|
66
185
|
def _init_parameters(self, parameter_ds, passdown=True):
|
|
67
186
|
cls = self.flow.__class__
|
|
@@ -120,7 +239,6 @@ class MetaflowTask(object):
|
|
|
120
239
|
lambda _, parameter_ds=parameter_ds: parameter_ds["_graph_info"],
|
|
121
240
|
)
|
|
122
241
|
all_vars.append("_graph_info")
|
|
123
|
-
|
|
124
242
|
if passdown:
|
|
125
243
|
self.flow._datastore.passdown_partial(parameter_ds, all_vars)
|
|
126
244
|
return param_only_vars
|
|
@@ -136,6 +254,7 @@ class MetaflowTask(object):
|
|
|
136
254
|
# Prefetch 'foreach' related artifacts to improve time taken by
|
|
137
255
|
# _init_foreach.
|
|
138
256
|
prefetch_data_artifacts = [
|
|
257
|
+
"_iteration_stack",
|
|
139
258
|
"_foreach_stack",
|
|
140
259
|
"_foreach_num_splits",
|
|
141
260
|
"_foreach_var",
|
|
@@ -147,6 +266,9 @@ class MetaflowTask(object):
|
|
|
147
266
|
run_id,
|
|
148
267
|
pathspecs=input_paths,
|
|
149
268
|
prefetch_data_artifacts=prefetch_data_artifacts,
|
|
269
|
+
join_type=join_type,
|
|
270
|
+
orig_flow_datastore=self.orig_flow_datastore,
|
|
271
|
+
spin_artifacts=self.spin_artifacts,
|
|
150
272
|
)
|
|
151
273
|
ds_list = [ds for ds in datastore_set]
|
|
152
274
|
if len(ds_list) != len(input_paths):
|
|
@@ -158,10 +280,27 @@ class MetaflowTask(object):
|
|
|
158
280
|
# initialize directly in the single input case.
|
|
159
281
|
ds_list = []
|
|
160
282
|
for input_path in input_paths:
|
|
161
|
-
|
|
283
|
+
parts = input_path.split("/")
|
|
284
|
+
if len(parts) == 3:
|
|
285
|
+
run_id, step_name, task_id = parts
|
|
286
|
+
attempt = None
|
|
287
|
+
else:
|
|
288
|
+
run_id, step_name, task_id, attempt = parts
|
|
289
|
+
attempt = int(attempt)
|
|
290
|
+
|
|
162
291
|
ds_list.append(
|
|
163
|
-
self.flow_datastore.get_task_datastore(
|
|
292
|
+
self.flow_datastore.get_task_datastore(
|
|
293
|
+
run_id,
|
|
294
|
+
step_name,
|
|
295
|
+
task_id,
|
|
296
|
+
attempt=attempt,
|
|
297
|
+
join_type=join_type,
|
|
298
|
+
orig_flow_datastore=self.orig_flow_datastore,
|
|
299
|
+
spin_artifacts=self.spin_artifacts,
|
|
300
|
+
)
|
|
164
301
|
)
|
|
302
|
+
from_start("MetaflowTask: got datastore for input path %s" % input_path)
|
|
303
|
+
|
|
165
304
|
if not ds_list:
|
|
166
305
|
# this guards against errors in input paths
|
|
167
306
|
raise MetaflowDataMissing(
|
|
@@ -272,6 +411,56 @@ class MetaflowTask(object):
|
|
|
272
411
|
elif "_foreach_stack" in inputs[0]:
|
|
273
412
|
self.flow._foreach_stack = inputs[0]["_foreach_stack"]
|
|
274
413
|
|
|
414
|
+
def _init_iteration(self, step_name, inputs, is_recursive_step):
|
|
415
|
+
# We track the iteration "stack" for loops. At this time, we
|
|
416
|
+
# only support one type of "looping" which is a recursive step but
|
|
417
|
+
# this can generalize to arbitrary well-scoped loops in the future.
|
|
418
|
+
|
|
419
|
+
# _iteration_stack will contain the iteration count for each loop
|
|
420
|
+
# level. Currently, there will be only no elements (no loops) or
|
|
421
|
+
# a single element (a single recursive step).
|
|
422
|
+
|
|
423
|
+
# We just need to determine the rules to add a new looping level,
|
|
424
|
+
# increment the looping level or pop the looping level. In our
|
|
425
|
+
# current support for only recursive steps, this is pretty straightforward:
|
|
426
|
+
# 1) if is_recursive_step:
|
|
427
|
+
# - we are entering a loop -- we are either entering for the first time
|
|
428
|
+
# or we are continuing the loop. Note that a recursive step CANNOT
|
|
429
|
+
# be a join step so there is always a single input
|
|
430
|
+
# 1a) If inputs[0]["_iteration_stack"] contains an element, we are looping
|
|
431
|
+
# so we increment the count
|
|
432
|
+
# 1b) If inputs[0]["_iteration_stack"] is empty, this is the first time we
|
|
433
|
+
# are entering the loop so we set the iteration count to 0
|
|
434
|
+
# 2) if it is not a recursive step, we need to determine if this is the step
|
|
435
|
+
# *after* the recursive step. The easiest way to determine that is to
|
|
436
|
+
# look at all inputs (there can be multiple in case of a join) and pop
|
|
437
|
+
# _iteration_stack if it is set. However, since we know that non recursive
|
|
438
|
+
# steps are *never* part of an iteration, we can simplify and just set it
|
|
439
|
+
# to [] without even checking anything. We will have to revisit this if/when
|
|
440
|
+
# more complex loop structures are supported.
|
|
441
|
+
|
|
442
|
+
# Note that just like _foreach_stack, we need to set _iteration_stack to *something*
|
|
443
|
+
# so that it doesn't get clobbered weirdly by merge_artifacts.
|
|
444
|
+
|
|
445
|
+
if is_recursive_step:
|
|
446
|
+
# Case 1)
|
|
447
|
+
if len(inputs) != 1:
|
|
448
|
+
raise MetaflowInternalError(
|
|
449
|
+
"Step *%s* is a recursive step but got multiple inputs." % step_name
|
|
450
|
+
)
|
|
451
|
+
inp = inputs[0]
|
|
452
|
+
if "_iteration_stack" not in inp or not inp["_iteration_stack"]:
|
|
453
|
+
# Case 1b)
|
|
454
|
+
self.flow._iteration_stack = [0]
|
|
455
|
+
else:
|
|
456
|
+
# Case 1a)
|
|
457
|
+
stack = inp["_iteration_stack"]
|
|
458
|
+
stack[-1] += 1
|
|
459
|
+
self.flow._iteration_stack = stack
|
|
460
|
+
else:
|
|
461
|
+
# Case 2)
|
|
462
|
+
self.flow._iteration_stack = []
|
|
463
|
+
|
|
275
464
|
def _clone_flow(self, datastore):
|
|
276
465
|
x = self.flow.__class__(use_cli=False)
|
|
277
466
|
x._set_datastore(datastore)
|
|
@@ -382,6 +571,8 @@ class MetaflowTask(object):
|
|
|
382
571
|
split_index,
|
|
383
572
|
retry_count,
|
|
384
573
|
max_user_code_retries,
|
|
574
|
+
whitelist_decorators=None,
|
|
575
|
+
persist=True,
|
|
385
576
|
):
|
|
386
577
|
if run_id and task_id:
|
|
387
578
|
self.metadata.register_run_id(run_id)
|
|
@@ -440,7 +631,14 @@ class MetaflowTask(object):
|
|
|
440
631
|
|
|
441
632
|
step_func = getattr(self.flow, step_name)
|
|
442
633
|
decorators = step_func.decorators
|
|
443
|
-
|
|
634
|
+
if self.orig_flow_datastore:
|
|
635
|
+
# We filter only the whitelisted decorators in case of spin step.
|
|
636
|
+
decorators = (
|
|
637
|
+
[]
|
|
638
|
+
if not whitelist_decorators
|
|
639
|
+
else [deco for deco in decorators if deco.name in whitelist_decorators]
|
|
640
|
+
)
|
|
641
|
+
from_start("MetaflowTask: decorators initialized")
|
|
444
642
|
node = self.flow._graph[step_name]
|
|
445
643
|
join_type = None
|
|
446
644
|
if node.type == "join":
|
|
@@ -448,17 +646,26 @@ class MetaflowTask(object):
|
|
|
448
646
|
|
|
449
647
|
# 1. initialize output datastore
|
|
450
648
|
output = self.flow_datastore.get_task_datastore(
|
|
451
|
-
run_id, step_name, task_id, attempt=retry_count, mode="w"
|
|
649
|
+
run_id, step_name, task_id, attempt=retry_count, mode="w", persist=persist
|
|
452
650
|
)
|
|
453
651
|
|
|
454
652
|
output.init_task()
|
|
653
|
+
from_start("MetaflowTask: output datastore initialized")
|
|
455
654
|
|
|
456
655
|
if input_paths:
|
|
457
656
|
# 2. initialize input datastores
|
|
458
657
|
inputs = self._init_data(run_id, join_type, input_paths)
|
|
658
|
+
from_start("MetaflowTask: input datastores initialized")
|
|
459
659
|
|
|
460
660
|
# 3. initialize foreach state
|
|
461
661
|
self._init_foreach(step_name, join_type, inputs, split_index)
|
|
662
|
+
from_start("MetaflowTask: foreach state initialized")
|
|
663
|
+
|
|
664
|
+
# 4. initialize the iteration state
|
|
665
|
+
is_recursive_step = (
|
|
666
|
+
node.type == "split-switch" and step_name in node.out_funcs
|
|
667
|
+
)
|
|
668
|
+
self._init_iteration(step_name, inputs, is_recursive_step)
|
|
462
669
|
|
|
463
670
|
# Add foreach stack to metadata of the task
|
|
464
671
|
|
|
@@ -511,7 +718,7 @@ class MetaflowTask(object):
|
|
|
511
718
|
),
|
|
512
719
|
]
|
|
513
720
|
)
|
|
514
|
-
|
|
721
|
+
from_start("MetaflowTask: finished input processing")
|
|
515
722
|
self.metadata.register_metadata(
|
|
516
723
|
run_id,
|
|
517
724
|
step_name,
|
|
@@ -538,6 +745,9 @@ class MetaflowTask(object):
|
|
|
538
745
|
output.save_metadata(
|
|
539
746
|
{
|
|
540
747
|
"task_begin": {
|
|
748
|
+
"code_package_metadata": os.environ.get(
|
|
749
|
+
"METAFLOW_CODE_METADATA", ""
|
|
750
|
+
),
|
|
541
751
|
"code_package_sha": os.environ.get("METAFLOW_CODE_SHA"),
|
|
542
752
|
"code_package_ds": os.environ.get("METAFLOW_CODE_DS"),
|
|
543
753
|
"code_package_url": os.environ.get("METAFLOW_CODE_URL"),
|
|
@@ -562,8 +772,11 @@ class MetaflowTask(object):
|
|
|
562
772
|
"project_flow_name": current.get("project_flow_name"),
|
|
563
773
|
"trace_id": trace_id or None,
|
|
564
774
|
}
|
|
775
|
+
|
|
776
|
+
from_start("MetaflowTask: task metadata initialized")
|
|
565
777
|
start = time.time()
|
|
566
778
|
self.metadata.start_task_heartbeat(self.flow.name, run_id, step_name, task_id)
|
|
779
|
+
from_start("MetaflowTask: heartbeat started")
|
|
567
780
|
with self.monitor.measure("metaflow.task.duration"):
|
|
568
781
|
try:
|
|
569
782
|
with self.monitor.count("metaflow.task.start"):
|
|
@@ -583,18 +796,23 @@ class MetaflowTask(object):
|
|
|
583
796
|
# should either be set prior to running the user code or listed in
|
|
584
797
|
# FlowSpec._EPHEMERAL to allow for proper merging/importing of
|
|
585
798
|
# user artifacts in the user's step code.
|
|
586
|
-
|
|
587
799
|
if join_type:
|
|
588
800
|
# Join step:
|
|
589
801
|
|
|
590
|
-
# Ensure that we have the right number of inputs.
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
)
|
|
802
|
+
# Ensure that we have the right number of inputs.
|
|
803
|
+
if join_type != "foreach":
|
|
804
|
+
# Find the corresponding split node from the graph.
|
|
805
|
+
split_node = self.flow._graph[node.split_parents[-1]]
|
|
806
|
+
# The number of expected inputs is the number of branches
|
|
807
|
+
# from that split -- we can't use in_funcs because there may
|
|
808
|
+
# be more due to split-switch branches that all converge here.
|
|
809
|
+
expected_inputs = len(split_node.out_funcs)
|
|
810
|
+
|
|
811
|
+
if len(inputs) != expected_inputs:
|
|
812
|
+
raise MetaflowDataMissing(
|
|
813
|
+
"Join *%s* expected %d inputs but only %d inputs "
|
|
814
|
+
"were found" % (step_name, expected_inputs, len(inputs))
|
|
815
|
+
)
|
|
598
816
|
|
|
599
817
|
# Multiple input contexts are passed in as an argument
|
|
600
818
|
# to the step function.
|
|
@@ -636,11 +854,19 @@ class MetaflowTask(object):
|
|
|
636
854
|
"graph_info": self.flow._graph_info,
|
|
637
855
|
}
|
|
638
856
|
)
|
|
857
|
+
from_start("MetaflowTask: before pre-step decorators")
|
|
639
858
|
for deco in decorators:
|
|
859
|
+
if deco.name == "card" and self.orig_flow_datastore:
|
|
860
|
+
# if spin step and card decorator, pass spin metadata
|
|
861
|
+
metadata = [m for m in METADATA_PROVIDERS if m.TYPE == "spin"][
|
|
862
|
+
0
|
|
863
|
+
](self.environment, self.flow, self.event_logger, self.monitor)
|
|
864
|
+
else:
|
|
865
|
+
metadata = self.metadata
|
|
640
866
|
deco.task_pre_step(
|
|
641
867
|
step_name,
|
|
642
868
|
output,
|
|
643
|
-
|
|
869
|
+
metadata,
|
|
644
870
|
run_id,
|
|
645
871
|
task_id,
|
|
646
872
|
self.flow,
|
|
@@ -651,6 +877,7 @@ class MetaflowTask(object):
|
|
|
651
877
|
inputs,
|
|
652
878
|
)
|
|
653
879
|
|
|
880
|
+
orig_step_func = step_func
|
|
654
881
|
for deco in decorators:
|
|
655
882
|
# decorators can actually decorate the step function,
|
|
656
883
|
# or they can replace it altogether. This functionality
|
|
@@ -665,12 +892,12 @@ class MetaflowTask(object):
|
|
|
665
892
|
max_user_code_retries,
|
|
666
893
|
self.ubf_context,
|
|
667
894
|
)
|
|
668
|
-
|
|
895
|
+
from_start("MetaflowTask: finished decorator processing")
|
|
669
896
|
if join_type:
|
|
670
|
-
self._exec_step_function(step_func, input_obj)
|
|
897
|
+
self._exec_step_function(step_func, orig_step_func, input_obj)
|
|
671
898
|
else:
|
|
672
|
-
self._exec_step_function(step_func)
|
|
673
|
-
|
|
899
|
+
self._exec_step_function(step_func, orig_step_func)
|
|
900
|
+
from_start("MetaflowTask: step function executed")
|
|
674
901
|
for deco in decorators:
|
|
675
902
|
deco.task_post_step(
|
|
676
903
|
step_name,
|
|
@@ -713,6 +940,7 @@ class MetaflowTask(object):
|
|
|
713
940
|
raise
|
|
714
941
|
|
|
715
942
|
finally:
|
|
943
|
+
from_start("MetaflowTask: decorators finalized")
|
|
716
944
|
if self.ubf_context == UBF_CONTROL:
|
|
717
945
|
self._finalize_control_task()
|
|
718
946
|
|
|
@@ -752,7 +980,7 @@ class MetaflowTask(object):
|
|
|
752
980
|
)
|
|
753
981
|
|
|
754
982
|
output.save_metadata({"task_end": {}})
|
|
755
|
-
|
|
983
|
+
from_start("MetaflowTask: output persisted")
|
|
756
984
|
# this writes a success marker indicating that the
|
|
757
985
|
# "transaction" is done
|
|
758
986
|
output.done()
|
|
@@ -781,3 +1009,4 @@ class MetaflowTask(object):
|
|
|
781
1009
|
name="duration",
|
|
782
1010
|
payload={**task_payload, "msg": str(duration)},
|
|
783
1011
|
)
|
|
1012
|
+
from_start("MetaflowTask: task run completed")
|
|
@@ -7,8 +7,9 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
|
7
7
|
from metaflow._vendor import click
|
|
8
8
|
from metaflow.debug import debug
|
|
9
9
|
|
|
10
|
-
from .config_parameters import
|
|
10
|
+
from .config_parameters import ConfigValue
|
|
11
11
|
from ..exception import MetaflowException, MetaflowInternalError
|
|
12
|
+
from ..packaging_sys import MetaflowCodeContent
|
|
12
13
|
from ..parameters import DeployTimeField, ParameterContext, current_flow
|
|
13
14
|
from ..util import get_username
|
|
14
15
|
|
|
@@ -24,12 +25,16 @@ _CONVERTED_DEFAULT_NO_FILE = _CONVERTED_DEFAULT + _NO_FILE
|
|
|
24
25
|
|
|
25
26
|
def _load_config_values(info_file: Optional[str] = None) -> Optional[Dict[Any, Any]]:
|
|
26
27
|
if info_file is None:
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
28
|
+
config_content = MetaflowCodeContent.get_config()
|
|
29
|
+
else:
|
|
30
|
+
try:
|
|
31
|
+
with open(info_file, encoding="utf-8") as f:
|
|
32
|
+
config_content = json.load(f)
|
|
33
|
+
except IOError:
|
|
34
|
+
return None
|
|
35
|
+
if config_content:
|
|
36
|
+
return config_content.get("user_configs", {})
|
|
37
|
+
return None
|
|
33
38
|
|
|
34
39
|
|
|
35
40
|
class ConvertPath(click.Path):
|
|
@@ -181,7 +186,7 @@ class ConfigInput:
|
|
|
181
186
|
click_obj: Optional[Any] = None,
|
|
182
187
|
):
|
|
183
188
|
from ..cli import echo_always, echo_dev_null # Prevent circular import
|
|
184
|
-
from ..flowspec import
|
|
189
|
+
from ..flowspec import FlowStateItems # Prevent circular import
|
|
185
190
|
|
|
186
191
|
flow_cls = getattr(current_flow, "flow_cls", None)
|
|
187
192
|
if flow_cls is None:
|
|
@@ -221,13 +226,13 @@ class ConfigInput:
|
|
|
221
226
|
if param_name == "config_value":
|
|
222
227
|
self._value_values = {
|
|
223
228
|
k.lower(): v
|
|
224
|
-
for k, v in param_value
|
|
229
|
+
for k, v in param_value.items()
|
|
225
230
|
if v is not None and not v.startswith(_CONVERTED_DEFAULT)
|
|
226
231
|
}
|
|
227
232
|
else:
|
|
228
233
|
self._path_values = {
|
|
229
234
|
k.lower(): v
|
|
230
|
-
for k, v in param_value
|
|
235
|
+
for k, v in param_value.items()
|
|
231
236
|
if v is not None and not v.startswith(_CONVERTED_DEFAULT)
|
|
232
237
|
}
|
|
233
238
|
if do_return:
|
|
@@ -255,7 +260,6 @@ class ConfigInput:
|
|
|
255
260
|
for k in all_keys
|
|
256
261
|
)
|
|
257
262
|
|
|
258
|
-
flow_cls._flow_state[_FlowState.CONFIGS] = {}
|
|
259
263
|
to_return = {}
|
|
260
264
|
|
|
261
265
|
if not has_all_kv:
|
|
@@ -327,14 +331,14 @@ class ConfigInput:
|
|
|
327
331
|
if val is None:
|
|
328
332
|
missing_configs.add(name)
|
|
329
333
|
to_return[name] = None
|
|
330
|
-
flow_cls._flow_state[
|
|
331
|
-
continue
|
|
332
|
-
if val.startswith(_CONVERTED_DEFAULT_NO_FILE):
|
|
333
|
-
no_default_file.append(name)
|
|
334
|
+
flow_cls._flow_state.self_data[FlowStateItems.CONFIGS][name] = None
|
|
334
335
|
continue
|
|
335
336
|
if val.startswith(_CONVERTED_NO_FILE):
|
|
336
337
|
no_file.append(name)
|
|
337
338
|
continue
|
|
339
|
+
if val.startswith(_CONVERTED_DEFAULT_NO_FILE):
|
|
340
|
+
no_default_file.append(name)
|
|
341
|
+
continue
|
|
338
342
|
|
|
339
343
|
val = val[len(_CONVERT_PREFIX) :] # Remove the _CONVERT_PREFIX
|
|
340
344
|
if val.startswith(_DEFAULT_PREFIX): # Remove the _DEFAULT_PREFIX if needed
|
|
@@ -351,8 +355,12 @@ class ConfigInput:
|
|
|
351
355
|
click_obj.delayed_config_exception = exc
|
|
352
356
|
return None
|
|
353
357
|
raise exc from e
|
|
354
|
-
flow_cls._flow_state[
|
|
355
|
-
|
|
358
|
+
flow_cls._flow_state.self_data[FlowStateItems.CONFIGS][
|
|
359
|
+
name
|
|
360
|
+
] = read_value
|
|
361
|
+
to_return[name] = (
|
|
362
|
+
ConfigValue(read_value) if read_value is not None else None
|
|
363
|
+
)
|
|
356
364
|
else:
|
|
357
365
|
if self._parsers[name]:
|
|
358
366
|
read_value = self._call_parser(self._parsers[name], val)
|
|
@@ -366,8 +374,12 @@ class ConfigInput:
|
|
|
366
374
|
)
|
|
367
375
|
continue
|
|
368
376
|
# TODO: Support YAML
|
|
369
|
-
flow_cls._flow_state[
|
|
370
|
-
|
|
377
|
+
flow_cls._flow_state.self_data[FlowStateItems.CONFIGS][
|
|
378
|
+
name
|
|
379
|
+
] = read_value
|
|
380
|
+
to_return[name] = (
|
|
381
|
+
ConfigValue(read_value) if read_value is not None else None
|
|
382
|
+
)
|
|
371
383
|
|
|
372
384
|
reqs = missing_configs.intersection(self._req_configs)
|
|
373
385
|
for missing in reqs:
|
|
@@ -398,7 +410,7 @@ class ConfigInput:
|
|
|
398
410
|
return self.process_configs(
|
|
399
411
|
ctx.obj.flow.name,
|
|
400
412
|
param.name,
|
|
401
|
-
value,
|
|
413
|
+
dict(value),
|
|
402
414
|
ctx.params["quiet"],
|
|
403
415
|
ctx.params["datastore"],
|
|
404
416
|
click_obj=ctx.obj,
|
|
@@ -433,7 +445,7 @@ class LocalFileInput(click.Path):
|
|
|
433
445
|
# Small wrapper around click.Path to set the value from which to read configuration
|
|
434
446
|
# values. This is set immediately upon processing the --local-config-file
|
|
435
447
|
# option and will therefore then be available when processing any of the other
|
|
436
|
-
# --config options (which will call ConfigInput.process_configs
|
|
448
|
+
# --config options (which will call ConfigInput.process_configs)
|
|
437
449
|
name = "LocalFileInput"
|
|
438
450
|
|
|
439
451
|
def convert(self, value, param, ctx):
|