metaflow 2.12.39__py2.py3-none-any.whl → 2.13.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/__init__.py +1 -1
- metaflow/cli.py +111 -36
- metaflow/cli_args.py +2 -2
- metaflow/cli_components/run_cmds.py +3 -1
- metaflow/datastore/flow_datastore.py +2 -2
- metaflow/exception.py +8 -2
- metaflow/flowspec.py +48 -36
- metaflow/graph.py +28 -27
- metaflow/includefile.py +2 -2
- metaflow/lint.py +35 -20
- metaflow/metadata_provider/heartbeat.py +23 -8
- metaflow/metaflow_config.py +7 -0
- metaflow/parameters.py +11 -4
- metaflow/plugins/argo/argo_client.py +0 -2
- metaflow/plugins/argo/argo_workflows.py +86 -104
- metaflow/plugins/argo/argo_workflows_cli.py +0 -1
- metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
- metaflow/plugins/argo/argo_workflows_deployer_objects.py +42 -0
- metaflow/plugins/argo/jobset_input_paths.py +0 -1
- metaflow/plugins/aws/aws_utils.py +6 -1
- metaflow/plugins/aws/batch/batch_client.py +1 -3
- metaflow/plugins/aws/batch/batch_decorator.py +11 -11
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -1
- metaflow/plugins/cards/card_creator.py +1 -0
- metaflow/plugins/cards/card_decorator.py +46 -8
- metaflow/plugins/kubernetes/kube_utils.py +55 -1
- metaflow/plugins/kubernetes/kubernetes.py +33 -80
- metaflow/plugins/kubernetes/kubernetes_cli.py +22 -5
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -2
- metaflow/plugins/kubernetes/kubernetes_job.py +3 -6
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +22 -5
- metaflow/plugins/pypi/bootstrap.py +249 -81
- metaflow/plugins/pypi/conda_environment.py +83 -27
- metaflow/plugins/pypi/micromamba.py +77 -36
- metaflow/plugins/pypi/pip.py +9 -6
- metaflow/plugins/pypi/utils.py +4 -2
- metaflow/runner/click_api.py +175 -39
- metaflow/runner/deployer_impl.py +6 -1
- metaflow/runner/metaflow_runner.py +6 -1
- metaflow/runner/utils.py +5 -0
- metaflow/user_configs/config_options.py +87 -34
- metaflow/user_configs/config_parameters.py +44 -25
- metaflow/util.py +2 -2
- metaflow/version.py +1 -1
- {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/METADATA +2 -2
- {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/RECORD +56 -56
- {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/WHEEL +1 -1
- {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/LICENSE +0 -0
- {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/entry_points.txt +0 -0
- {metaflow-2.12.39.dist-info → metaflow-2.13.1.dist-info}/top_level.txt +0 -0
metaflow/lint.py
CHANGED
@@ -52,7 +52,7 @@ def check_reserved_words(graph):
|
|
52
52
|
msg = "Step name *%s* is a reserved word. Choose another name for the " "step."
|
53
53
|
for node in graph:
|
54
54
|
if node.name in RESERVED:
|
55
|
-
raise LintWarn(msg % node.name)
|
55
|
+
raise LintWarn(msg % node.name, node.func_lineno, node.source_file)
|
56
56
|
|
57
57
|
|
58
58
|
@linter.ensure_fundamentals
|
@@ -76,9 +76,9 @@ def check_that_end_is_end(graph):
|
|
76
76
|
node = graph["end"]
|
77
77
|
|
78
78
|
if node.has_tail_next or node.invalid_tail_next:
|
79
|
-
raise LintWarn(msg0, node.tail_next_lineno)
|
79
|
+
raise LintWarn(msg0, node.tail_next_lineno, node.source_file)
|
80
80
|
if node.num_args > 1:
|
81
|
-
raise LintWarn(msg1, node.tail_next_lineno)
|
81
|
+
raise LintWarn(msg1, node.tail_next_lineno, node.source_file)
|
82
82
|
|
83
83
|
|
84
84
|
@linter.ensure_fundamentals
|
@@ -90,7 +90,7 @@ def check_step_names(graph):
|
|
90
90
|
)
|
91
91
|
for node in graph:
|
92
92
|
if re.search("[^a-z0-9_]", node.name) or node.name[0] == "_":
|
93
|
-
raise LintWarn(msg.format(node), node.func_lineno)
|
93
|
+
raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
|
94
94
|
|
95
95
|
|
96
96
|
@linter.ensure_fundamentals
|
@@ -108,11 +108,11 @@ def check_num_args(graph):
|
|
108
108
|
msg2 = "Step *{0.name}* is missing the 'self' argument."
|
109
109
|
for node in graph:
|
110
110
|
if node.num_args > 2:
|
111
|
-
raise LintWarn(msg0.format(node), node.func_lineno)
|
111
|
+
raise LintWarn(msg0.format(node), node.func_lineno, node.source_file)
|
112
112
|
elif node.num_args == 2 and node.type != "join":
|
113
|
-
raise LintWarn(msg1.format(node), node.func_lineno)
|
113
|
+
raise LintWarn(msg1.format(node), node.func_lineno, node.source_file)
|
114
114
|
elif node.num_args == 0:
|
115
|
-
raise LintWarn(msg2.format(node), node.func_lineno)
|
115
|
+
raise LintWarn(msg2.format(node), node.func_lineno, node.source_file)
|
116
116
|
|
117
117
|
|
118
118
|
@linter.ensure_static_graph
|
@@ -125,7 +125,7 @@ def check_static_transitions(graph):
|
|
125
125
|
)
|
126
126
|
for node in graph:
|
127
127
|
if node.type != "end" and not node.has_tail_next:
|
128
|
-
raise LintWarn(msg.format(node), node.func_lineno)
|
128
|
+
raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
|
129
129
|
|
130
130
|
|
131
131
|
@linter.ensure_static_graph
|
@@ -138,7 +138,7 @@ def check_valid_transitions(graph):
|
|
138
138
|
)
|
139
139
|
for node in graph:
|
140
140
|
if node.type != "end" and node.has_tail_next and node.invalid_tail_next:
|
141
|
-
raise LintWarn(msg.format(node), node.tail_next_lineno)
|
141
|
+
raise LintWarn(msg.format(node), node.tail_next_lineno, node.source_file)
|
142
142
|
|
143
143
|
|
144
144
|
@linter.ensure_static_graph
|
@@ -151,7 +151,11 @@ def check_unknown_transitions(graph):
|
|
151
151
|
for node in graph:
|
152
152
|
unknown = [n for n in node.out_funcs if n not in graph]
|
153
153
|
if unknown:
|
154
|
-
raise LintWarn(
|
154
|
+
raise LintWarn(
|
155
|
+
msg.format(node, step=unknown[0]),
|
156
|
+
node.tail_next_lineno,
|
157
|
+
node.source_file,
|
158
|
+
)
|
155
159
|
|
156
160
|
|
157
161
|
@linter.ensure_acyclicity
|
@@ -167,7 +171,9 @@ def check_for_acyclicity(graph):
|
|
167
171
|
for n in node.out_funcs:
|
168
172
|
if n in seen:
|
169
173
|
path = "->".join(seen + [n])
|
170
|
-
raise LintWarn(
|
174
|
+
raise LintWarn(
|
175
|
+
msg.format(path), node.tail_next_lineno, node.source_file
|
176
|
+
)
|
171
177
|
else:
|
172
178
|
check_path(graph[n], seen + [n])
|
173
179
|
|
@@ -195,7 +201,7 @@ def check_for_orphans(graph):
|
|
195
201
|
orphans = nodeset - seen
|
196
202
|
if orphans:
|
197
203
|
orphan = graph[list(orphans)[0]]
|
198
|
-
raise LintWarn(msg.format(orphan), orphan.func_lineno)
|
204
|
+
raise LintWarn(msg.format(orphan), orphan.func_lineno, orphan.source_file)
|
199
205
|
|
200
206
|
|
201
207
|
@linter.ensure_static_graph
|
@@ -230,7 +236,9 @@ def check_split_join_balance(graph):
|
|
230
236
|
if split_stack:
|
231
237
|
_, split_roots = split_stack.pop()
|
232
238
|
roots = ", ".join(split_roots)
|
233
|
-
raise LintWarn(
|
239
|
+
raise LintWarn(
|
240
|
+
msg0.format(roots=roots), node.func_lineno, node.source_file
|
241
|
+
)
|
234
242
|
elif node.type == "join":
|
235
243
|
if split_stack:
|
236
244
|
_, split_roots = split_stack[-1]
|
@@ -243,9 +251,10 @@ def check_split_join_balance(graph):
|
|
243
251
|
node, paths=paths, num_roots=len(split_roots), roots=roots
|
244
252
|
),
|
245
253
|
node.func_lineno,
|
254
|
+
node.source_file,
|
246
255
|
)
|
247
256
|
else:
|
248
|
-
raise LintWarn(msg2.format(node), node.func_lineno)
|
257
|
+
raise LintWarn(msg2.format(node), node.func_lineno, node.source_file)
|
249
258
|
|
250
259
|
# check that incoming steps come from the same lineage
|
251
260
|
# (no cross joins)
|
@@ -256,7 +265,7 @@ def check_split_join_balance(graph):
|
|
256
265
|
return tuple(graph[n].split_parents)
|
257
266
|
|
258
267
|
if not all_equal(map(parents, node.in_funcs)):
|
259
|
-
raise LintWarn(msg3.format(node), node.func_lineno)
|
268
|
+
raise LintWarn(msg3.format(node), node.func_lineno, node.source_file)
|
260
269
|
|
261
270
|
for n in node.out_funcs:
|
262
271
|
traverse(graph[n], new_stack)
|
@@ -276,7 +285,9 @@ def check_empty_foreaches(graph):
|
|
276
285
|
if node.type == "foreach":
|
277
286
|
joins = [n for n in node.out_funcs if graph[n].type == "join"]
|
278
287
|
if joins:
|
279
|
-
raise LintWarn(
|
288
|
+
raise LintWarn(
|
289
|
+
msg.format(node, join=joins[0]), node.func_lineno, node.source_file
|
290
|
+
)
|
280
291
|
|
281
292
|
|
282
293
|
@linter.ensure_static_graph
|
@@ -290,7 +301,7 @@ def check_parallel_step_after_next(graph):
|
|
290
301
|
if node.parallel_foreach and not all(
|
291
302
|
graph[out_node].parallel_step for out_node in node.out_funcs
|
292
303
|
):
|
293
|
-
raise LintWarn(msg.format(node))
|
304
|
+
raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
|
294
305
|
|
295
306
|
|
296
307
|
@linter.ensure_static_graph
|
@@ -303,7 +314,9 @@ def check_join_followed_by_parallel_step(graph):
|
|
303
314
|
)
|
304
315
|
for node in graph:
|
305
316
|
if node.parallel_step and not graph[node.out_funcs[0]].type == "join":
|
306
|
-
raise LintWarn(
|
317
|
+
raise LintWarn(
|
318
|
+
msg.format(node.out_funcs[0]), node.func_lineno, node.source_file
|
319
|
+
)
|
307
320
|
|
308
321
|
|
309
322
|
@linter.ensure_static_graph
|
@@ -318,7 +331,9 @@ def check_parallel_foreach_calls_parallel_step(graph):
|
|
318
331
|
for node2 in graph:
|
319
332
|
if node2.out_funcs and node.name in node2.out_funcs:
|
320
333
|
if not node2.parallel_foreach:
|
321
|
-
raise LintWarn(
|
334
|
+
raise LintWarn(
|
335
|
+
msg.format(node, node2), node.func_lineno, node.source_file
|
336
|
+
)
|
322
337
|
|
323
338
|
|
324
339
|
@linter.ensure_non_nested_foreach
|
@@ -331,4 +346,4 @@ def check_nested_foreach(graph):
|
|
331
346
|
for node in graph:
|
332
347
|
if node.type == "foreach":
|
333
348
|
if any(graph[p].type == "foreach" for p in node.split_parents):
|
334
|
-
raise LintWarn(msg.format(node))
|
349
|
+
raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
|
@@ -1,11 +1,12 @@
|
|
1
|
+
import json
|
1
2
|
import time
|
3
|
+
from threading import Thread
|
4
|
+
|
2
5
|
import requests
|
3
|
-
import json
|
4
6
|
|
5
|
-
from threading import Thread
|
6
|
-
from metaflow.sidecar import MessageTypes, Message
|
7
|
-
from metaflow.metaflow_config import SERVICE_HEADERS
|
8
7
|
from metaflow.exception import MetaflowException
|
8
|
+
from metaflow.metaflow_config import SERVICE_HEADERS
|
9
|
+
from metaflow.sidecar import Message, MessageTypes
|
9
10
|
|
10
11
|
HB_URL_KEY = "hb_url"
|
11
12
|
|
@@ -52,13 +53,27 @@ class MetadataHeartBeat(object):
|
|
52
53
|
retry_counter = 0
|
53
54
|
except HeartBeatException as e:
|
54
55
|
retry_counter = retry_counter + 1
|
55
|
-
time.sleep(
|
56
|
+
time.sleep(1.5**retry_counter)
|
56
57
|
|
57
58
|
def _heartbeat(self):
|
58
59
|
if self.hb_url is not None:
|
59
|
-
|
60
|
-
|
61
|
-
|
60
|
+
try:
|
61
|
+
response = requests.post(
|
62
|
+
url=self.hb_url, data="{}", headers=self.headers.copy()
|
63
|
+
)
|
64
|
+
except requests.exceptions.ConnectionError as e:
|
65
|
+
raise HeartBeatException(
|
66
|
+
"HeartBeat request (%s) failed" " (ConnectionError)" % (self.hb_url)
|
67
|
+
)
|
68
|
+
except requests.exceptions.Timeout as e:
|
69
|
+
raise HeartBeatException(
|
70
|
+
"HeartBeat request (%s) failed" " (Timeout)" % (self.hb_url)
|
71
|
+
)
|
72
|
+
except requests.exceptions.RequestException as e:
|
73
|
+
raise HeartBeatException(
|
74
|
+
"HeartBeat request (%s) failed"
|
75
|
+
" (RequestException) %s" % (self.hb_url, str(e))
|
76
|
+
)
|
62
77
|
# Unfortunately, response.json() returns a string that we need
|
63
78
|
# to cast to json; however when the request encounters an error
|
64
79
|
# the return type is a json blob :/
|
metaflow/metaflow_config.py
CHANGED
@@ -355,6 +355,8 @@ KUBERNETES_PERSISTENT_VOLUME_CLAIMS = from_conf(
|
|
355
355
|
KUBERNETES_SECRETS = from_conf("KUBERNETES_SECRETS", "")
|
356
356
|
# Default labels for kubernetes pods
|
357
357
|
KUBERNETES_LABELS = from_conf("KUBERNETES_LABELS", "")
|
358
|
+
# Default annotations for kubernetes pods
|
359
|
+
KUBERNETES_ANNOTATIONS = from_conf("KUBERNETES_ANNOTATIONS", "")
|
358
360
|
# Default GPU vendor to use by K8S jobs created by Metaflow (supports nvidia, amd)
|
359
361
|
KUBERNETES_GPU_VENDOR = from_conf("KUBERNETES_GPU_VENDOR", "nvidia")
|
360
362
|
# Default container image for K8S
|
@@ -508,6 +510,11 @@ DISABLE_TRACING = bool(os.environ.get("DISABLE_TRACING", False))
|
|
508
510
|
# lexicographic ordering of attempts. This won't work if MAX_ATTEMPTS > 99.
|
509
511
|
MAX_ATTEMPTS = 6
|
510
512
|
|
513
|
+
# Feature flag (experimental features that are *explicitly* unsupported)
|
514
|
+
|
515
|
+
# Process configs even when using the click_api for Runner/Deployer
|
516
|
+
CLICK_API_PROCESS_CONFIG = from_conf("CLICK_API_PROCESS_CONFIG", False)
|
517
|
+
|
511
518
|
|
512
519
|
# PINNED_CONDA_LIBS are the libraries that metaflow depends on for execution
|
513
520
|
# and are needed within a conda environment
|
metaflow/parameters.py
CHANGED
@@ -359,7 +359,7 @@ class Parameter(object):
|
|
359
359
|
"show_default": show_default,
|
360
360
|
}
|
361
361
|
|
362
|
-
def init(self):
|
362
|
+
def init(self, ignore_errors=False):
|
363
363
|
# Prevent circular import
|
364
364
|
from .user_configs.config_parameters import (
|
365
365
|
resolve_delayed_evaluator,
|
@@ -367,14 +367,21 @@ class Parameter(object):
|
|
367
367
|
)
|
368
368
|
|
369
369
|
# Resolve any value from configurations
|
370
|
-
self.kwargs = unpack_delayed_evaluator(self.kwargs)
|
371
|
-
|
370
|
+
self.kwargs = unpack_delayed_evaluator(self.kwargs, ignore_errors=ignore_errors)
|
371
|
+
# Do it one item at a time so errors are ignored at that level (as opposed to
|
372
|
+
# at the entire kwargs leve)
|
373
|
+
self.kwargs = {
|
374
|
+
k: resolve_delayed_evaluator(v, ignore_errors=ignore_errors)
|
375
|
+
for k, v in self.kwargs.items()
|
376
|
+
}
|
372
377
|
|
373
378
|
# This was the behavior before configs: values specified in args would override
|
374
379
|
# stuff in kwargs which is what we implement here as well
|
375
380
|
for key, value in self._override_kwargs.items():
|
376
381
|
if value is not None:
|
377
|
-
self.kwargs[key] =
|
382
|
+
self.kwargs[key] = resolve_delayed_evaluator(
|
383
|
+
value, ignore_errors=ignore_errors
|
384
|
+
)
|
378
385
|
# Set two default values if no-one specified them
|
379
386
|
self.kwargs.setdefault("required", False)
|
380
387
|
self.kwargs.setdefault("show_default", True)
|
@@ -7,12 +7,11 @@ import sys
|
|
7
7
|
from collections import defaultdict
|
8
8
|
from hashlib import sha1
|
9
9
|
from math import inf
|
10
|
-
from typing import List, Tuple
|
11
10
|
|
12
11
|
from metaflow import JSONType, current
|
13
12
|
from metaflow.decorators import flow_decorators
|
14
13
|
from metaflow.exception import MetaflowException
|
15
|
-
from metaflow.graph import
|
14
|
+
from metaflow.graph import FlowGraph
|
16
15
|
from metaflow.includefile import FilePathClass
|
17
16
|
from metaflow.metaflow_config import (
|
18
17
|
ARGO_EVENTS_EVENT,
|
@@ -39,9 +38,7 @@ from metaflow.metaflow_config import (
|
|
39
38
|
DEFAULT_SECRETS_BACKEND_TYPE,
|
40
39
|
GCP_SECRET_MANAGER_PREFIX,
|
41
40
|
KUBERNETES_FETCH_EC2_METADATA,
|
42
|
-
KUBERNETES_LABELS,
|
43
41
|
KUBERNETES_NAMESPACE,
|
44
|
-
KUBERNETES_NODE_SELECTOR,
|
45
42
|
KUBERNETES_SANDBOX_INIT_SCRIPT,
|
46
43
|
KUBERNETES_SECRETS,
|
47
44
|
S3_ENDPOINT_URL,
|
@@ -54,10 +51,7 @@ from metaflow.metaflow_config_funcs import config_values
|
|
54
51
|
from metaflow.mflog import BASH_SAVE_LOGS, bash_capture_logs, export_mflog_env_vars
|
55
52
|
from metaflow.parameters import deploy_time_eval
|
56
53
|
from metaflow.plugins.kubernetes.kube_utils import qos_requests_and_limits
|
57
|
-
|
58
|
-
parse_kube_keyvalue_list,
|
59
|
-
validate_kube_labels,
|
60
|
-
)
|
54
|
+
|
61
55
|
from metaflow.plugins.kubernetes.kubernetes_jobsets import KubernetesArgoJobSet
|
62
56
|
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
|
63
57
|
from metaflow.user_configs.config_options import ConfigInput
|
@@ -173,7 +167,8 @@ class ArgoWorkflows(object):
|
|
173
167
|
self.triggers, self.trigger_options = self._process_triggers()
|
174
168
|
self._schedule, self._timezone = self._get_schedule()
|
175
169
|
|
176
|
-
self.
|
170
|
+
self._base_labels = self._base_kubernetes_labels()
|
171
|
+
self._base_annotations = self._base_kubernetes_annotations()
|
177
172
|
self._workflow_template = self._compile_workflow_template()
|
178
173
|
self._sensor = self._compile_sensor()
|
179
174
|
|
@@ -310,7 +305,7 @@ class ArgoWorkflows(object):
|
|
310
305
|
try:
|
311
306
|
# Check that the workflow was deployed through Metaflow
|
312
307
|
workflow_template["metadata"]["annotations"]["metaflow/owner"]
|
313
|
-
except KeyError
|
308
|
+
except KeyError:
|
314
309
|
raise ArgoWorkflowsException(
|
315
310
|
"An existing non-metaflow workflow with the same name as "
|
316
311
|
"*%s* already exists in Argo Workflows. \nPlease modify the "
|
@@ -324,18 +319,42 @@ class ArgoWorkflows(object):
|
|
324
319
|
except Exception as e:
|
325
320
|
raise ArgoWorkflowsException(str(e))
|
326
321
|
|
327
|
-
|
328
|
-
def _get_kubernetes_labels():
|
322
|
+
def _base_kubernetes_labels(self):
|
329
323
|
"""
|
330
|
-
Get Kubernetes labels
|
331
|
-
Parses the string into a dict and validates that values adhere to Kubernetes restrictions.
|
324
|
+
Get shared Kubernetes labels for Argo resources.
|
332
325
|
"""
|
333
|
-
if
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
326
|
+
# TODO: Add configuration through an environment variable or Metaflow config in the future if required.
|
327
|
+
labels = {"app.kubernetes.io/part-of": "metaflow"}
|
328
|
+
|
329
|
+
return labels
|
330
|
+
|
331
|
+
def _base_kubernetes_annotations(self):
|
332
|
+
"""
|
333
|
+
Get shared Kubernetes annotations for Argo resources.
|
334
|
+
"""
|
335
|
+
from datetime import datetime, timezone
|
336
|
+
|
337
|
+
# TODO: Add configuration through an environment variable or Metaflow config in the future if required.
|
338
|
+
# base annotations
|
339
|
+
annotations = {
|
340
|
+
"metaflow/production_token": self.production_token,
|
341
|
+
"metaflow/owner": self.username,
|
342
|
+
"metaflow/user": "argo-workflows",
|
343
|
+
"metaflow/flow_name": self.flow.name,
|
344
|
+
"metaflow/deployment_timestamp": str(
|
345
|
+
datetime.now(timezone.utc).isoformat()
|
346
|
+
),
|
347
|
+
}
|
348
|
+
|
349
|
+
if current.get("project_name"):
|
350
|
+
annotations.update(
|
351
|
+
{
|
352
|
+
"metaflow/project_name": current.project_name,
|
353
|
+
"metaflow/branch_name": current.branch_name,
|
354
|
+
"metaflow/project_flow_name": current.project_flow_name,
|
355
|
+
}
|
356
|
+
)
|
357
|
+
return annotations
|
339
358
|
|
340
359
|
def _get_schedule(self):
|
341
360
|
schedule = self.flow._flow_decorators.get("schedule")
|
@@ -411,7 +430,7 @@ class ArgoWorkflows(object):
|
|
411
430
|
"metaflow/production_token"
|
412
431
|
],
|
413
432
|
)
|
414
|
-
except KeyError
|
433
|
+
except KeyError:
|
415
434
|
raise ArgoWorkflowsException(
|
416
435
|
"An existing non-metaflow workflow with the same name as "
|
417
436
|
"*%s* already exists in Argo Workflows. \nPlease modify the "
|
@@ -676,18 +695,7 @@ class ArgoWorkflows(object):
|
|
676
695
|
# generate container templates at the top level (in WorkflowSpec) and maintain
|
677
696
|
# references to them within the DAGTask.
|
678
697
|
|
679
|
-
|
680
|
-
|
681
|
-
annotations = {
|
682
|
-
"metaflow/production_token": self.production_token,
|
683
|
-
"metaflow/owner": self.username,
|
684
|
-
"metaflow/user": "argo-workflows",
|
685
|
-
"metaflow/flow_name": self.flow.name,
|
686
|
-
"metaflow/deployment_timestamp": str(
|
687
|
-
datetime.now(timezone.utc).isoformat()
|
688
|
-
),
|
689
|
-
}
|
690
|
-
|
698
|
+
annotations = {}
|
691
699
|
if self._schedule is not None:
|
692
700
|
# timezone is an optional field and json dumps on None will result in null
|
693
701
|
# hence configuring it to an empty string
|
@@ -699,15 +707,6 @@ class ArgoWorkflows(object):
|
|
699
707
|
if self.parameters:
|
700
708
|
annotations.update({"metaflow/parameters": json.dumps(self.parameters)})
|
701
709
|
|
702
|
-
if current.get("project_name"):
|
703
|
-
annotations.update(
|
704
|
-
{
|
705
|
-
"metaflow/project_name": current.project_name,
|
706
|
-
"metaflow/branch_name": current.branch_name,
|
707
|
-
"metaflow/project_flow_name": current.project_flow_name,
|
708
|
-
}
|
709
|
-
)
|
710
|
-
|
711
710
|
# Some more annotations to populate the Argo UI nicely
|
712
711
|
if self.tags:
|
713
712
|
annotations.update({"metaflow/tags": json.dumps(self.tags)})
|
@@ -755,9 +754,10 @@ class ArgoWorkflows(object):
|
|
755
754
|
# is released, we should be able to support multi-namespace /
|
756
755
|
# multi-cluster scheduling.
|
757
756
|
.namespace(KUBERNETES_NAMESPACE)
|
758
|
-
.label("app.kubernetes.io/name", "metaflow-flow")
|
759
|
-
.label("app.kubernetes.io/part-of", "metaflow")
|
760
757
|
.annotations(annotations)
|
758
|
+
.annotations(self._base_annotations)
|
759
|
+
.labels(self._base_labels)
|
760
|
+
.label("app.kubernetes.io/name", "metaflow-flow")
|
761
761
|
)
|
762
762
|
.spec(
|
763
763
|
WorkflowSpec()
|
@@ -787,10 +787,14 @@ class ArgoWorkflows(object):
|
|
787
787
|
# Set workflow metadata
|
788
788
|
.workflow_metadata(
|
789
789
|
Metadata()
|
790
|
+
.labels(self._base_labels)
|
790
791
|
.label("app.kubernetes.io/name", "metaflow-run")
|
791
|
-
.label("app.kubernetes.io/part-of", "metaflow")
|
792
792
|
.annotations(
|
793
|
-
{
|
793
|
+
{
|
794
|
+
**annotations,
|
795
|
+
**self._base_annotations,
|
796
|
+
**{"metaflow/run_id": "argo-{{workflow.name}}"},
|
797
|
+
}
|
794
798
|
)
|
795
799
|
# TODO: Set dynamic labels using labels_from. Ideally, we would
|
796
800
|
# want to expose run_id as a label. It's easy to add labels,
|
@@ -823,10 +827,10 @@ class ArgoWorkflows(object):
|
|
823
827
|
# Set common pod metadata.
|
824
828
|
.pod_metadata(
|
825
829
|
Metadata()
|
830
|
+
.labels(self._base_labels)
|
826
831
|
.label("app.kubernetes.io/name", "metaflow-task")
|
827
|
-
.label("app.kubernetes.io/part-of", "metaflow")
|
828
832
|
.annotations(annotations)
|
829
|
-
.
|
833
|
+
.annotations(self._base_annotations)
|
830
834
|
)
|
831
835
|
# Set the entrypoint to flow name
|
832
836
|
.entrypoint(self.flow.name)
|
@@ -1895,15 +1899,7 @@ class ArgoWorkflows(object):
|
|
1895
1899
|
# twice, but due to issues with variable substitution, we will have to
|
1896
1900
|
# live with this routine.
|
1897
1901
|
if node.parallel_step:
|
1898
|
-
# Explicitly add the task-id-hint label. This is important because this label
|
1899
|
-
# is returned as an Output parameter of this step and is used subsequently as an
|
1900
|
-
# an input in the join step.
|
1901
|
-
kubernetes_labels = self.kubernetes_labels.copy()
|
1902
1902
|
jobset_name = "{{inputs.parameters.jobset-name}}"
|
1903
|
-
kubernetes_labels["task_id_entropy"] = (
|
1904
|
-
"{{inputs.parameters.task-id-entropy}}"
|
1905
|
-
)
|
1906
|
-
kubernetes_labels["num_parallel"] = "{{inputs.parameters.num-parallel}}"
|
1907
1903
|
jobset = KubernetesArgoJobSet(
|
1908
1904
|
kubernetes_sdk=kubernetes_sdk,
|
1909
1905
|
name=jobset_name,
|
@@ -1959,8 +1955,22 @@ class ArgoWorkflows(object):
|
|
1959
1955
|
for k, v in env.items():
|
1960
1956
|
jobset.environment_variable(k, v)
|
1961
1957
|
|
1962
|
-
|
1963
|
-
|
1958
|
+
# Set labels. Do not allow user-specified task labels to override internal ones.
|
1959
|
+
#
|
1960
|
+
# Explicitly add the task-id-hint label. This is important because this label
|
1961
|
+
# is returned as an Output parameter of this step and is used subsequently as an
|
1962
|
+
# an input in the join step.
|
1963
|
+
kubernetes_labels = {
|
1964
|
+
"task_id_entropy": "{{inputs.parameters.task-id-entropy}}",
|
1965
|
+
"num_parallel": "{{inputs.parameters.num-parallel}}",
|
1966
|
+
}
|
1967
|
+
jobset.labels(
|
1968
|
+
{
|
1969
|
+
**resources["labels"],
|
1970
|
+
**self._base_labels,
|
1971
|
+
**kubernetes_labels,
|
1972
|
+
}
|
1973
|
+
)
|
1964
1974
|
|
1965
1975
|
jobset.environment_variable(
|
1966
1976
|
"MF_MASTER_ADDR", jobset.jobset_control_addr
|
@@ -1989,27 +1999,23 @@ class ArgoWorkflows(object):
|
|
1989
1999
|
"TASK_ID_SUFFIX": "metadata.annotations['jobset.sigs.k8s.io/job-index']",
|
1990
2000
|
}
|
1991
2001
|
)
|
2002
|
+
|
2003
|
+
# Set annotations. Do not allow user-specified task-specific annotations to override internal ones.
|
1992
2004
|
annotations = {
|
1993
2005
|
# setting annotations explicitly as they wont be
|
1994
2006
|
# passed down from WorkflowTemplate level
|
1995
2007
|
"metaflow/step_name": node.name,
|
1996
2008
|
"metaflow/attempt": str(retry_count),
|
1997
2009
|
"metaflow/run_id": run_id,
|
1998
|
-
"metaflow/production_token": self.production_token,
|
1999
|
-
"metaflow/owner": self.username,
|
2000
|
-
"metaflow/user": "argo-workflows",
|
2001
|
-
"metaflow/flow_name": self.flow.name,
|
2002
2010
|
}
|
2003
|
-
|
2004
|
-
|
2005
|
-
|
2006
|
-
|
2007
|
-
|
2008
|
-
|
2009
|
-
|
2010
|
-
|
2011
|
-
for k, v in annotations.items():
|
2012
|
-
jobset.annotation(k, v)
|
2011
|
+
|
2012
|
+
jobset.annotations(
|
2013
|
+
{
|
2014
|
+
**resources["annotations"],
|
2015
|
+
**self._base_annotations,
|
2016
|
+
**annotations,
|
2017
|
+
}
|
2018
|
+
)
|
2013
2019
|
|
2014
2020
|
jobset.control.replicas(1)
|
2015
2021
|
jobset.worker.replicas("{{=asInt(inputs.parameters.workerCount)}}")
|
@@ -2066,13 +2072,16 @@ class ArgoWorkflows(object):
|
|
2066
2072
|
minutes_between_retries=minutes_between_retries,
|
2067
2073
|
)
|
2068
2074
|
.metadata(
|
2069
|
-
ObjectMeta()
|
2075
|
+
ObjectMeta()
|
2076
|
+
.annotation("metaflow/step_name", node.name)
|
2070
2077
|
# Unfortunately, we can't set the task_id since it is generated
|
2071
2078
|
# inside the pod. However, it can be inferred from the annotation
|
2072
2079
|
# set by argo-workflows - `workflows.argoproj.io/outputs` - refer
|
2073
2080
|
# the field 'task-id' in 'parameters'
|
2074
2081
|
# .annotation("metaflow/task_id", ...)
|
2075
2082
|
.annotation("metaflow/attempt", retry_count)
|
2083
|
+
.annotations(resources["annotations"])
|
2084
|
+
.labels(resources["labels"])
|
2076
2085
|
)
|
2077
2086
|
# Set emptyDir volume for state management
|
2078
2087
|
.empty_dir_volume("out")
|
@@ -2840,33 +2849,6 @@ class ArgoWorkflows(object):
|
|
2840
2849
|
"sdk (https://pypi.org/project/kubernetes/) first."
|
2841
2850
|
)
|
2842
2851
|
|
2843
|
-
labels = {"app.kubernetes.io/part-of": "metaflow"}
|
2844
|
-
|
2845
|
-
annotations = {
|
2846
|
-
"metaflow/production_token": self.production_token,
|
2847
|
-
"metaflow/owner": self.username,
|
2848
|
-
"metaflow/user": "argo-workflows",
|
2849
|
-
"metaflow/flow_name": self.flow.name,
|
2850
|
-
}
|
2851
|
-
if current.get("project_name"):
|
2852
|
-
annotations.update(
|
2853
|
-
{
|
2854
|
-
"metaflow/project_name": current.project_name,
|
2855
|
-
"metaflow/branch_name": current.branch_name,
|
2856
|
-
"metaflow/project_flow_name": current.project_flow_name,
|
2857
|
-
}
|
2858
|
-
)
|
2859
|
-
|
2860
|
-
# Useful to paint the UI
|
2861
|
-
trigger_annotations = {
|
2862
|
-
"metaflow/triggered_by": json.dumps(
|
2863
|
-
[
|
2864
|
-
{key: trigger.get(key) for key in ["name", "type"]}
|
2865
|
-
for trigger in self.triggers
|
2866
|
-
]
|
2867
|
-
)
|
2868
|
-
}
|
2869
|
-
|
2870
2852
|
return (
|
2871
2853
|
Sensor()
|
2872
2854
|
.metadata(
|
@@ -2874,10 +2856,9 @@ class ArgoWorkflows(object):
|
|
2874
2856
|
ObjectMeta()
|
2875
2857
|
.name(ArgoWorkflows._sensor_name(self.name))
|
2876
2858
|
.namespace(KUBERNETES_NAMESPACE)
|
2859
|
+
.labels(self._base_labels)
|
2877
2860
|
.label("app.kubernetes.io/name", "metaflow-sensor")
|
2878
|
-
.
|
2879
|
-
.labels(self.kubernetes_labels)
|
2880
|
-
.annotations(annotations)
|
2861
|
+
.annotations(self._base_annotations)
|
2881
2862
|
)
|
2882
2863
|
.spec(
|
2883
2864
|
SensorSpec().template(
|
@@ -2887,7 +2868,7 @@ class ArgoWorkflows(object):
|
|
2887
2868
|
ObjectMeta()
|
2888
2869
|
.label("app.kubernetes.io/name", "metaflow-sensor")
|
2889
2870
|
.label("app.kubernetes.io/part-of", "metaflow")
|
2890
|
-
.annotations(
|
2871
|
+
.annotations(self._base_annotations)
|
2891
2872
|
)
|
2892
2873
|
.container(
|
2893
2874
|
# Run sensor in guaranteed QoS. The sensor isn't doing a lot
|
@@ -2934,6 +2915,7 @@ class ArgoWorkflows(object):
|
|
2934
2915
|
"metadata": {
|
2935
2916
|
"generateName": "%s-" % self.name,
|
2936
2917
|
"namespace": KUBERNETES_NAMESPACE,
|
2918
|
+
# Useful to paint the UI
|
2937
2919
|
"annotations": {
|
2938
2920
|
"metaflow/triggered_by": json.dumps(
|
2939
2921
|
[
|
@@ -1,14 +1,12 @@
|
|
1
1
|
import json
|
2
2
|
import os
|
3
|
-
import time
|
4
3
|
|
5
4
|
|
6
5
|
from metaflow import current
|
7
6
|
from metaflow.decorators import StepDecorator
|
8
7
|
from metaflow.events import Trigger
|
9
8
|
from metaflow.metadata_provider import MetaDatum
|
10
|
-
from metaflow.
|
11
|
-
from metaflow.graph import DAGNode, FlowGraph
|
9
|
+
from metaflow.graph import FlowGraph
|
12
10
|
from metaflow.flowspec import FlowSpec
|
13
11
|
from .argo_events import ArgoEvent
|
14
12
|
|
@@ -42,7 +40,7 @@ class ArgoWorkflowsInternalDecorator(StepDecorator):
|
|
42
40
|
if payload != "null": # Argo-Workflow's None
|
43
41
|
try:
|
44
42
|
payload = json.loads(payload)
|
45
|
-
except (TypeError, ValueError)
|
43
|
+
except (TypeError, ValueError):
|
46
44
|
# There could be arbitrary events that Metaflow doesn't know of
|
47
45
|
payload = {}
|
48
46
|
triggers.append(
|