ob-metaflow 2.12.39.1__py2.py3-none-any.whl → 2.13.1.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/__init__.py +1 -1
- metaflow/cli.py +111 -36
- metaflow/cli_args.py +2 -2
- metaflow/cli_components/run_cmds.py +3 -1
- metaflow/datastore/flow_datastore.py +2 -2
- metaflow/exception.py +8 -2
- metaflow/flowspec.py +48 -36
- metaflow/graph.py +28 -27
- metaflow/includefile.py +2 -2
- metaflow/lint.py +35 -20
- metaflow/metadata_provider/heartbeat.py +23 -8
- metaflow/metaflow_config.py +7 -0
- metaflow/parameters.py +11 -4
- metaflow/plugins/argo/argo_client.py +0 -2
- metaflow/plugins/argo/argo_workflows.py +86 -104
- metaflow/plugins/argo/argo_workflows_cli.py +0 -1
- metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
- metaflow/plugins/argo/jobset_input_paths.py +0 -1
- metaflow/plugins/aws/aws_utils.py +6 -1
- metaflow/plugins/aws/batch/batch_client.py +1 -3
- metaflow/plugins/aws/batch/batch_decorator.py +11 -11
- metaflow/plugins/aws/secrets_manager/aws_secrets_manager_secrets_provider.py +13 -10
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -1
- metaflow/plugins/cards/card_creator.py +1 -0
- metaflow/plugins/cards/card_decorator.py +46 -8
- metaflow/plugins/kubernetes/kube_utils.py +55 -1
- metaflow/plugins/kubernetes/kubernetes.py +33 -80
- metaflow/plugins/kubernetes/kubernetes_cli.py +22 -5
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -2
- metaflow/plugins/kubernetes/kubernetes_job.py +3 -6
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +22 -5
- metaflow/plugins/pypi/bootstrap.py +87 -54
- metaflow/plugins/pypi/conda_environment.py +7 -6
- metaflow/plugins/pypi/micromamba.py +35 -21
- metaflow/plugins/pypi/pip.py +2 -4
- metaflow/plugins/pypi/utils.py +4 -2
- metaflow/runner/click_api.py +175 -39
- metaflow/runner/deployer_impl.py +6 -1
- metaflow/runner/metaflow_runner.py +6 -1
- metaflow/user_configs/config_options.py +87 -34
- metaflow/user_configs/config_parameters.py +44 -25
- metaflow/util.py +2 -2
- metaflow/version.py +1 -1
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/METADATA +2 -2
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/RECORD +54 -54
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/LICENSE +0 -0
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.12.39.1.dist-info → ob_metaflow-2.13.1.1.dist-info}/top_level.txt +0 -0
metaflow/lint.py
CHANGED
|
@@ -52,7 +52,7 @@ def check_reserved_words(graph):
|
|
|
52
52
|
msg = "Step name *%s* is a reserved word. Choose another name for the " "step."
|
|
53
53
|
for node in graph:
|
|
54
54
|
if node.name in RESERVED:
|
|
55
|
-
raise LintWarn(msg % node.name)
|
|
55
|
+
raise LintWarn(msg % node.name, node.func_lineno, node.source_file)
|
|
56
56
|
|
|
57
57
|
|
|
58
58
|
@linter.ensure_fundamentals
|
|
@@ -76,9 +76,9 @@ def check_that_end_is_end(graph):
|
|
|
76
76
|
node = graph["end"]
|
|
77
77
|
|
|
78
78
|
if node.has_tail_next or node.invalid_tail_next:
|
|
79
|
-
raise LintWarn(msg0, node.tail_next_lineno)
|
|
79
|
+
raise LintWarn(msg0, node.tail_next_lineno, node.source_file)
|
|
80
80
|
if node.num_args > 1:
|
|
81
|
-
raise LintWarn(msg1, node.tail_next_lineno)
|
|
81
|
+
raise LintWarn(msg1, node.tail_next_lineno, node.source_file)
|
|
82
82
|
|
|
83
83
|
|
|
84
84
|
@linter.ensure_fundamentals
|
|
@@ -90,7 +90,7 @@ def check_step_names(graph):
|
|
|
90
90
|
)
|
|
91
91
|
for node in graph:
|
|
92
92
|
if re.search("[^a-z0-9_]", node.name) or node.name[0] == "_":
|
|
93
|
-
raise LintWarn(msg.format(node), node.func_lineno)
|
|
93
|
+
raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
|
|
94
94
|
|
|
95
95
|
|
|
96
96
|
@linter.ensure_fundamentals
|
|
@@ -108,11 +108,11 @@ def check_num_args(graph):
|
|
|
108
108
|
msg2 = "Step *{0.name}* is missing the 'self' argument."
|
|
109
109
|
for node in graph:
|
|
110
110
|
if node.num_args > 2:
|
|
111
|
-
raise LintWarn(msg0.format(node), node.func_lineno)
|
|
111
|
+
raise LintWarn(msg0.format(node), node.func_lineno, node.source_file)
|
|
112
112
|
elif node.num_args == 2 and node.type != "join":
|
|
113
|
-
raise LintWarn(msg1.format(node), node.func_lineno)
|
|
113
|
+
raise LintWarn(msg1.format(node), node.func_lineno, node.source_file)
|
|
114
114
|
elif node.num_args == 0:
|
|
115
|
-
raise LintWarn(msg2.format(node), node.func_lineno)
|
|
115
|
+
raise LintWarn(msg2.format(node), node.func_lineno, node.source_file)
|
|
116
116
|
|
|
117
117
|
|
|
118
118
|
@linter.ensure_static_graph
|
|
@@ -125,7 +125,7 @@ def check_static_transitions(graph):
|
|
|
125
125
|
)
|
|
126
126
|
for node in graph:
|
|
127
127
|
if node.type != "end" and not node.has_tail_next:
|
|
128
|
-
raise LintWarn(msg.format(node), node.func_lineno)
|
|
128
|
+
raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
|
|
129
129
|
|
|
130
130
|
|
|
131
131
|
@linter.ensure_static_graph
|
|
@@ -138,7 +138,7 @@ def check_valid_transitions(graph):
|
|
|
138
138
|
)
|
|
139
139
|
for node in graph:
|
|
140
140
|
if node.type != "end" and node.has_tail_next and node.invalid_tail_next:
|
|
141
|
-
raise LintWarn(msg.format(node), node.tail_next_lineno)
|
|
141
|
+
raise LintWarn(msg.format(node), node.tail_next_lineno, node.source_file)
|
|
142
142
|
|
|
143
143
|
|
|
144
144
|
@linter.ensure_static_graph
|
|
@@ -151,7 +151,11 @@ def check_unknown_transitions(graph):
|
|
|
151
151
|
for node in graph:
|
|
152
152
|
unknown = [n for n in node.out_funcs if n not in graph]
|
|
153
153
|
if unknown:
|
|
154
|
-
raise LintWarn(
|
|
154
|
+
raise LintWarn(
|
|
155
|
+
msg.format(node, step=unknown[0]),
|
|
156
|
+
node.tail_next_lineno,
|
|
157
|
+
node.source_file,
|
|
158
|
+
)
|
|
155
159
|
|
|
156
160
|
|
|
157
161
|
@linter.ensure_acyclicity
|
|
@@ -167,7 +171,9 @@ def check_for_acyclicity(graph):
|
|
|
167
171
|
for n in node.out_funcs:
|
|
168
172
|
if n in seen:
|
|
169
173
|
path = "->".join(seen + [n])
|
|
170
|
-
raise LintWarn(
|
|
174
|
+
raise LintWarn(
|
|
175
|
+
msg.format(path), node.tail_next_lineno, node.source_file
|
|
176
|
+
)
|
|
171
177
|
else:
|
|
172
178
|
check_path(graph[n], seen + [n])
|
|
173
179
|
|
|
@@ -195,7 +201,7 @@ def check_for_orphans(graph):
|
|
|
195
201
|
orphans = nodeset - seen
|
|
196
202
|
if orphans:
|
|
197
203
|
orphan = graph[list(orphans)[0]]
|
|
198
|
-
raise LintWarn(msg.format(orphan), orphan.func_lineno)
|
|
204
|
+
raise LintWarn(msg.format(orphan), orphan.func_lineno, orphan.source_file)
|
|
199
205
|
|
|
200
206
|
|
|
201
207
|
@linter.ensure_static_graph
|
|
@@ -230,7 +236,9 @@ def check_split_join_balance(graph):
|
|
|
230
236
|
if split_stack:
|
|
231
237
|
_, split_roots = split_stack.pop()
|
|
232
238
|
roots = ", ".join(split_roots)
|
|
233
|
-
raise LintWarn(
|
|
239
|
+
raise LintWarn(
|
|
240
|
+
msg0.format(roots=roots), node.func_lineno, node.source_file
|
|
241
|
+
)
|
|
234
242
|
elif node.type == "join":
|
|
235
243
|
if split_stack:
|
|
236
244
|
_, split_roots = split_stack[-1]
|
|
@@ -243,9 +251,10 @@ def check_split_join_balance(graph):
|
|
|
243
251
|
node, paths=paths, num_roots=len(split_roots), roots=roots
|
|
244
252
|
),
|
|
245
253
|
node.func_lineno,
|
|
254
|
+
node.source_file,
|
|
246
255
|
)
|
|
247
256
|
else:
|
|
248
|
-
raise LintWarn(msg2.format(node), node.func_lineno)
|
|
257
|
+
raise LintWarn(msg2.format(node), node.func_lineno, node.source_file)
|
|
249
258
|
|
|
250
259
|
# check that incoming steps come from the same lineage
|
|
251
260
|
# (no cross joins)
|
|
@@ -256,7 +265,7 @@ def check_split_join_balance(graph):
|
|
|
256
265
|
return tuple(graph[n].split_parents)
|
|
257
266
|
|
|
258
267
|
if not all_equal(map(parents, node.in_funcs)):
|
|
259
|
-
raise LintWarn(msg3.format(node), node.func_lineno)
|
|
268
|
+
raise LintWarn(msg3.format(node), node.func_lineno, node.source_file)
|
|
260
269
|
|
|
261
270
|
for n in node.out_funcs:
|
|
262
271
|
traverse(graph[n], new_stack)
|
|
@@ -276,7 +285,9 @@ def check_empty_foreaches(graph):
|
|
|
276
285
|
if node.type == "foreach":
|
|
277
286
|
joins = [n for n in node.out_funcs if graph[n].type == "join"]
|
|
278
287
|
if joins:
|
|
279
|
-
raise LintWarn(
|
|
288
|
+
raise LintWarn(
|
|
289
|
+
msg.format(node, join=joins[0]), node.func_lineno, node.source_file
|
|
290
|
+
)
|
|
280
291
|
|
|
281
292
|
|
|
282
293
|
@linter.ensure_static_graph
|
|
@@ -290,7 +301,7 @@ def check_parallel_step_after_next(graph):
|
|
|
290
301
|
if node.parallel_foreach and not all(
|
|
291
302
|
graph[out_node].parallel_step for out_node in node.out_funcs
|
|
292
303
|
):
|
|
293
|
-
raise LintWarn(msg.format(node))
|
|
304
|
+
raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
|
|
294
305
|
|
|
295
306
|
|
|
296
307
|
@linter.ensure_static_graph
|
|
@@ -303,7 +314,9 @@ def check_join_followed_by_parallel_step(graph):
|
|
|
303
314
|
)
|
|
304
315
|
for node in graph:
|
|
305
316
|
if node.parallel_step and not graph[node.out_funcs[0]].type == "join":
|
|
306
|
-
raise LintWarn(
|
|
317
|
+
raise LintWarn(
|
|
318
|
+
msg.format(node.out_funcs[0]), node.func_lineno, node.source_file
|
|
319
|
+
)
|
|
307
320
|
|
|
308
321
|
|
|
309
322
|
@linter.ensure_static_graph
|
|
@@ -318,7 +331,9 @@ def check_parallel_foreach_calls_parallel_step(graph):
|
|
|
318
331
|
for node2 in graph:
|
|
319
332
|
if node2.out_funcs and node.name in node2.out_funcs:
|
|
320
333
|
if not node2.parallel_foreach:
|
|
321
|
-
raise LintWarn(
|
|
334
|
+
raise LintWarn(
|
|
335
|
+
msg.format(node, node2), node.func_lineno, node.source_file
|
|
336
|
+
)
|
|
322
337
|
|
|
323
338
|
|
|
324
339
|
@linter.ensure_non_nested_foreach
|
|
@@ -331,4 +346,4 @@ def check_nested_foreach(graph):
|
|
|
331
346
|
for node in graph:
|
|
332
347
|
if node.type == "foreach":
|
|
333
348
|
if any(graph[p].type == "foreach" for p in node.split_parents):
|
|
334
|
-
raise LintWarn(msg.format(node))
|
|
349
|
+
raise LintWarn(msg.format(node), node.func_lineno, node.source_file)
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import time
|
|
3
|
+
from threading import Thread
|
|
4
|
+
|
|
2
5
|
import requests
|
|
3
|
-
import json
|
|
4
6
|
|
|
5
|
-
from threading import Thread
|
|
6
|
-
from metaflow.sidecar import MessageTypes, Message
|
|
7
|
-
from metaflow.metaflow_config import SERVICE_HEADERS
|
|
8
7
|
from metaflow.exception import MetaflowException
|
|
8
|
+
from metaflow.metaflow_config import SERVICE_HEADERS
|
|
9
|
+
from metaflow.sidecar import Message, MessageTypes
|
|
9
10
|
|
|
10
11
|
HB_URL_KEY = "hb_url"
|
|
11
12
|
|
|
@@ -52,13 +53,27 @@ class MetadataHeartBeat(object):
|
|
|
52
53
|
retry_counter = 0
|
|
53
54
|
except HeartBeatException as e:
|
|
54
55
|
retry_counter = retry_counter + 1
|
|
55
|
-
time.sleep(
|
|
56
|
+
time.sleep(1.5**retry_counter)
|
|
56
57
|
|
|
57
58
|
def _heartbeat(self):
|
|
58
59
|
if self.hb_url is not None:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
try:
|
|
61
|
+
response = requests.post(
|
|
62
|
+
url=self.hb_url, data="{}", headers=self.headers.copy()
|
|
63
|
+
)
|
|
64
|
+
except requests.exceptions.ConnectionError as e:
|
|
65
|
+
raise HeartBeatException(
|
|
66
|
+
"HeartBeat request (%s) failed" " (ConnectionError)" % (self.hb_url)
|
|
67
|
+
)
|
|
68
|
+
except requests.exceptions.Timeout as e:
|
|
69
|
+
raise HeartBeatException(
|
|
70
|
+
"HeartBeat request (%s) failed" " (Timeout)" % (self.hb_url)
|
|
71
|
+
)
|
|
72
|
+
except requests.exceptions.RequestException as e:
|
|
73
|
+
raise HeartBeatException(
|
|
74
|
+
"HeartBeat request (%s) failed"
|
|
75
|
+
" (RequestException) %s" % (self.hb_url, str(e))
|
|
76
|
+
)
|
|
62
77
|
# Unfortunately, response.json() returns a string that we need
|
|
63
78
|
# to cast to json; however when the request encounters an error
|
|
64
79
|
# the return type is a json blob :/
|
metaflow/metaflow_config.py
CHANGED
|
@@ -356,6 +356,8 @@ KUBERNETES_PERSISTENT_VOLUME_CLAIMS = from_conf(
|
|
|
356
356
|
KUBERNETES_SECRETS = from_conf("KUBERNETES_SECRETS", "")
|
|
357
357
|
# Default labels for kubernetes pods
|
|
358
358
|
KUBERNETES_LABELS = from_conf("KUBERNETES_LABELS", "")
|
|
359
|
+
# Default annotations for kubernetes pods
|
|
360
|
+
KUBERNETES_ANNOTATIONS = from_conf("KUBERNETES_ANNOTATIONS", "")
|
|
359
361
|
# Default GPU vendor to use by K8S jobs created by Metaflow (supports nvidia, amd)
|
|
360
362
|
KUBERNETES_GPU_VENDOR = from_conf("KUBERNETES_GPU_VENDOR", "nvidia")
|
|
361
363
|
# Default container image for K8S
|
|
@@ -511,6 +513,11 @@ MAX_CPU_PER_TASK = from_conf("MAX_CPU_PER_TASK")
|
|
|
511
513
|
# lexicographic ordering of attempts. This won't work if MAX_ATTEMPTS > 99.
|
|
512
514
|
MAX_ATTEMPTS = 6
|
|
513
515
|
|
|
516
|
+
# Feature flag (experimental features that are *explicitly* unsupported)
|
|
517
|
+
|
|
518
|
+
# Process configs even when using the click_api for Runner/Deployer
|
|
519
|
+
CLICK_API_PROCESS_CONFIG = from_conf("CLICK_API_PROCESS_CONFIG", False)
|
|
520
|
+
|
|
514
521
|
|
|
515
522
|
# PINNED_CONDA_LIBS are the libraries that metaflow depends on for execution
|
|
516
523
|
# and are needed within a conda environment
|
metaflow/parameters.py
CHANGED
|
@@ -359,7 +359,7 @@ class Parameter(object):
|
|
|
359
359
|
"show_default": show_default,
|
|
360
360
|
}
|
|
361
361
|
|
|
362
|
-
def init(self):
|
|
362
|
+
def init(self, ignore_errors=False):
|
|
363
363
|
# Prevent circular import
|
|
364
364
|
from .user_configs.config_parameters import (
|
|
365
365
|
resolve_delayed_evaluator,
|
|
@@ -367,14 +367,21 @@ class Parameter(object):
|
|
|
367
367
|
)
|
|
368
368
|
|
|
369
369
|
# Resolve any value from configurations
|
|
370
|
-
self.kwargs = unpack_delayed_evaluator(self.kwargs)
|
|
371
|
-
|
|
370
|
+
self.kwargs = unpack_delayed_evaluator(self.kwargs, ignore_errors=ignore_errors)
|
|
371
|
+
# Do it one item at a time so errors are ignored at that level (as opposed to
|
|
372
|
+
# at the entire kwargs leve)
|
|
373
|
+
self.kwargs = {
|
|
374
|
+
k: resolve_delayed_evaluator(v, ignore_errors=ignore_errors)
|
|
375
|
+
for k, v in self.kwargs.items()
|
|
376
|
+
}
|
|
372
377
|
|
|
373
378
|
# This was the behavior before configs: values specified in args would override
|
|
374
379
|
# stuff in kwargs which is what we implement here as well
|
|
375
380
|
for key, value in self._override_kwargs.items():
|
|
376
381
|
if value is not None:
|
|
377
|
-
self.kwargs[key] =
|
|
382
|
+
self.kwargs[key] = resolve_delayed_evaluator(
|
|
383
|
+
value, ignore_errors=ignore_errors
|
|
384
|
+
)
|
|
378
385
|
# Set two default values if no-one specified them
|
|
379
386
|
self.kwargs.setdefault("required", False)
|
|
380
387
|
self.kwargs.setdefault("show_default", True)
|
|
@@ -7,12 +7,11 @@ import sys
|
|
|
7
7
|
from collections import defaultdict
|
|
8
8
|
from hashlib import sha1
|
|
9
9
|
from math import inf
|
|
10
|
-
from typing import List, Tuple
|
|
11
10
|
|
|
12
11
|
from metaflow import JSONType, current
|
|
13
12
|
from metaflow.decorators import flow_decorators
|
|
14
13
|
from metaflow.exception import MetaflowException
|
|
15
|
-
from metaflow.graph import
|
|
14
|
+
from metaflow.graph import FlowGraph
|
|
16
15
|
from metaflow.includefile import FilePathClass
|
|
17
16
|
from metaflow.metaflow_config import (
|
|
18
17
|
ARGO_EVENTS_EVENT,
|
|
@@ -39,9 +38,7 @@ from metaflow.metaflow_config import (
|
|
|
39
38
|
DEFAULT_SECRETS_BACKEND_TYPE,
|
|
40
39
|
GCP_SECRET_MANAGER_PREFIX,
|
|
41
40
|
KUBERNETES_FETCH_EC2_METADATA,
|
|
42
|
-
KUBERNETES_LABELS,
|
|
43
41
|
KUBERNETES_NAMESPACE,
|
|
44
|
-
KUBERNETES_NODE_SELECTOR,
|
|
45
42
|
KUBERNETES_SANDBOX_INIT_SCRIPT,
|
|
46
43
|
KUBERNETES_SECRETS,
|
|
47
44
|
S3_ENDPOINT_URL,
|
|
@@ -55,10 +52,7 @@ from metaflow.metaflow_config_funcs import config_values, init_config
|
|
|
55
52
|
from metaflow.mflog import BASH_SAVE_LOGS, bash_capture_logs, export_mflog_env_vars
|
|
56
53
|
from metaflow.parameters import deploy_time_eval
|
|
57
54
|
from metaflow.plugins.kubernetes.kube_utils import qos_requests_and_limits
|
|
58
|
-
|
|
59
|
-
parse_kube_keyvalue_list,
|
|
60
|
-
validate_kube_labels,
|
|
61
|
-
)
|
|
55
|
+
|
|
62
56
|
from metaflow.plugins.kubernetes.kubernetes_jobsets import KubernetesArgoJobSet
|
|
63
57
|
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
|
|
64
58
|
from metaflow.user_configs.config_options import ConfigInput
|
|
@@ -174,7 +168,8 @@ class ArgoWorkflows(object):
|
|
|
174
168
|
self.triggers, self.trigger_options = self._process_triggers()
|
|
175
169
|
self._schedule, self._timezone = self._get_schedule()
|
|
176
170
|
|
|
177
|
-
self.
|
|
171
|
+
self._base_labels = self._base_kubernetes_labels()
|
|
172
|
+
self._base_annotations = self._base_kubernetes_annotations()
|
|
178
173
|
self._workflow_template = self._compile_workflow_template()
|
|
179
174
|
self._sensor = self._compile_sensor()
|
|
180
175
|
|
|
@@ -311,7 +306,7 @@ class ArgoWorkflows(object):
|
|
|
311
306
|
try:
|
|
312
307
|
# Check that the workflow was deployed through Metaflow
|
|
313
308
|
workflow_template["metadata"]["annotations"]["metaflow/owner"]
|
|
314
|
-
except KeyError
|
|
309
|
+
except KeyError:
|
|
315
310
|
raise ArgoWorkflowsException(
|
|
316
311
|
"An existing non-metaflow workflow with the same name as "
|
|
317
312
|
"*%s* already exists in Argo Workflows. \nPlease modify the "
|
|
@@ -325,18 +320,42 @@ class ArgoWorkflows(object):
|
|
|
325
320
|
except Exception as e:
|
|
326
321
|
raise ArgoWorkflowsException(str(e))
|
|
327
322
|
|
|
328
|
-
|
|
329
|
-
def _get_kubernetes_labels():
|
|
323
|
+
def _base_kubernetes_labels(self):
|
|
330
324
|
"""
|
|
331
|
-
Get Kubernetes labels
|
|
332
|
-
Parses the string into a dict and validates that values adhere to Kubernetes restrictions.
|
|
325
|
+
Get shared Kubernetes labels for Argo resources.
|
|
333
326
|
"""
|
|
334
|
-
if
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
327
|
+
# TODO: Add configuration through an environment variable or Metaflow config in the future if required.
|
|
328
|
+
labels = {"app.kubernetes.io/part-of": "metaflow"}
|
|
329
|
+
|
|
330
|
+
return labels
|
|
331
|
+
|
|
332
|
+
def _base_kubernetes_annotations(self):
|
|
333
|
+
"""
|
|
334
|
+
Get shared Kubernetes annotations for Argo resources.
|
|
335
|
+
"""
|
|
336
|
+
from datetime import datetime, timezone
|
|
337
|
+
|
|
338
|
+
# TODO: Add configuration through an environment variable or Metaflow config in the future if required.
|
|
339
|
+
# base annotations
|
|
340
|
+
annotations = {
|
|
341
|
+
"metaflow/production_token": self.production_token,
|
|
342
|
+
"metaflow/owner": self.username,
|
|
343
|
+
"metaflow/user": "argo-workflows",
|
|
344
|
+
"metaflow/flow_name": self.flow.name,
|
|
345
|
+
"metaflow/deployment_timestamp": str(
|
|
346
|
+
datetime.now(timezone.utc).isoformat()
|
|
347
|
+
),
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
if current.get("project_name"):
|
|
351
|
+
annotations.update(
|
|
352
|
+
{
|
|
353
|
+
"metaflow/project_name": current.project_name,
|
|
354
|
+
"metaflow/branch_name": current.branch_name,
|
|
355
|
+
"metaflow/project_flow_name": current.project_flow_name,
|
|
356
|
+
}
|
|
357
|
+
)
|
|
358
|
+
return annotations
|
|
340
359
|
|
|
341
360
|
def _get_schedule(self):
|
|
342
361
|
schedule = self.flow._flow_decorators.get("schedule")
|
|
@@ -412,7 +431,7 @@ class ArgoWorkflows(object):
|
|
|
412
431
|
"metaflow/production_token"
|
|
413
432
|
],
|
|
414
433
|
)
|
|
415
|
-
except KeyError
|
|
434
|
+
except KeyError:
|
|
416
435
|
raise ArgoWorkflowsException(
|
|
417
436
|
"An existing non-metaflow workflow with the same name as "
|
|
418
437
|
"*%s* already exists in Argo Workflows. \nPlease modify the "
|
|
@@ -677,18 +696,7 @@ class ArgoWorkflows(object):
|
|
|
677
696
|
# generate container templates at the top level (in WorkflowSpec) and maintain
|
|
678
697
|
# references to them within the DAGTask.
|
|
679
698
|
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
annotations = {
|
|
683
|
-
"metaflow/production_token": self.production_token,
|
|
684
|
-
"metaflow/owner": self.username,
|
|
685
|
-
"metaflow/user": "argo-workflows",
|
|
686
|
-
"metaflow/flow_name": self.flow.name,
|
|
687
|
-
"metaflow/deployment_timestamp": str(
|
|
688
|
-
datetime.now(timezone.utc).isoformat()
|
|
689
|
-
),
|
|
690
|
-
}
|
|
691
|
-
|
|
699
|
+
annotations = {}
|
|
692
700
|
if self._schedule is not None:
|
|
693
701
|
# timezone is an optional field and json dumps on None will result in null
|
|
694
702
|
# hence configuring it to an empty string
|
|
@@ -700,15 +708,6 @@ class ArgoWorkflows(object):
|
|
|
700
708
|
if self.parameters:
|
|
701
709
|
annotations.update({"metaflow/parameters": json.dumps(self.parameters)})
|
|
702
710
|
|
|
703
|
-
if current.get("project_name"):
|
|
704
|
-
annotations.update(
|
|
705
|
-
{
|
|
706
|
-
"metaflow/project_name": current.project_name,
|
|
707
|
-
"metaflow/branch_name": current.branch_name,
|
|
708
|
-
"metaflow/project_flow_name": current.project_flow_name,
|
|
709
|
-
}
|
|
710
|
-
)
|
|
711
|
-
|
|
712
711
|
# Some more annotations to populate the Argo UI nicely
|
|
713
712
|
if self.tags:
|
|
714
713
|
annotations.update({"metaflow/tags": json.dumps(self.tags)})
|
|
@@ -756,9 +755,10 @@ class ArgoWorkflows(object):
|
|
|
756
755
|
# is released, we should be able to support multi-namespace /
|
|
757
756
|
# multi-cluster scheduling.
|
|
758
757
|
.namespace(KUBERNETES_NAMESPACE)
|
|
759
|
-
.label("app.kubernetes.io/name", "metaflow-flow")
|
|
760
|
-
.label("app.kubernetes.io/part-of", "metaflow")
|
|
761
758
|
.annotations(annotations)
|
|
759
|
+
.annotations(self._base_annotations)
|
|
760
|
+
.labels(self._base_labels)
|
|
761
|
+
.label("app.kubernetes.io/name", "metaflow-flow")
|
|
762
762
|
)
|
|
763
763
|
.spec(
|
|
764
764
|
WorkflowSpec()
|
|
@@ -788,10 +788,14 @@ class ArgoWorkflows(object):
|
|
|
788
788
|
# Set workflow metadata
|
|
789
789
|
.workflow_metadata(
|
|
790
790
|
Metadata()
|
|
791
|
+
.labels(self._base_labels)
|
|
791
792
|
.label("app.kubernetes.io/name", "metaflow-run")
|
|
792
|
-
.label("app.kubernetes.io/part-of", "metaflow")
|
|
793
793
|
.annotations(
|
|
794
|
-
{
|
|
794
|
+
{
|
|
795
|
+
**annotations,
|
|
796
|
+
**self._base_annotations,
|
|
797
|
+
**{"metaflow/run_id": "argo-{{workflow.name}}"},
|
|
798
|
+
}
|
|
795
799
|
)
|
|
796
800
|
# TODO: Set dynamic labels using labels_from. Ideally, we would
|
|
797
801
|
# want to expose run_id as a label. It's easy to add labels,
|
|
@@ -824,10 +828,10 @@ class ArgoWorkflows(object):
|
|
|
824
828
|
# Set common pod metadata.
|
|
825
829
|
.pod_metadata(
|
|
826
830
|
Metadata()
|
|
831
|
+
.labels(self._base_labels)
|
|
827
832
|
.label("app.kubernetes.io/name", "metaflow-task")
|
|
828
|
-
.label("app.kubernetes.io/part-of", "metaflow")
|
|
829
833
|
.annotations(annotations)
|
|
830
|
-
.
|
|
834
|
+
.annotations(self._base_annotations)
|
|
831
835
|
)
|
|
832
836
|
# Set the entrypoint to flow name
|
|
833
837
|
.entrypoint(self.flow.name)
|
|
@@ -1911,15 +1915,7 @@ class ArgoWorkflows(object):
|
|
|
1911
1915
|
# twice, but due to issues with variable substitution, we will have to
|
|
1912
1916
|
# live with this routine.
|
|
1913
1917
|
if node.parallel_step:
|
|
1914
|
-
# Explicitly add the task-id-hint label. This is important because this label
|
|
1915
|
-
# is returned as an Output parameter of this step and is used subsequently as an
|
|
1916
|
-
# an input in the join step.
|
|
1917
|
-
kubernetes_labels = self.kubernetes_labels.copy()
|
|
1918
1918
|
jobset_name = "{{inputs.parameters.jobset-name}}"
|
|
1919
|
-
kubernetes_labels["task_id_entropy"] = (
|
|
1920
|
-
"{{inputs.parameters.task-id-entropy}}"
|
|
1921
|
-
)
|
|
1922
|
-
kubernetes_labels["num_parallel"] = "{{inputs.parameters.num-parallel}}"
|
|
1923
1919
|
jobset = KubernetesArgoJobSet(
|
|
1924
1920
|
kubernetes_sdk=kubernetes_sdk,
|
|
1925
1921
|
name=jobset_name,
|
|
@@ -1975,8 +1971,22 @@ class ArgoWorkflows(object):
|
|
|
1975
1971
|
for k, v in env.items():
|
|
1976
1972
|
jobset.environment_variable(k, v)
|
|
1977
1973
|
|
|
1978
|
-
|
|
1979
|
-
|
|
1974
|
+
# Set labels. Do not allow user-specified task labels to override internal ones.
|
|
1975
|
+
#
|
|
1976
|
+
# Explicitly add the task-id-hint label. This is important because this label
|
|
1977
|
+
# is returned as an Output parameter of this step and is used subsequently as an
|
|
1978
|
+
# an input in the join step.
|
|
1979
|
+
kubernetes_labels = {
|
|
1980
|
+
"task_id_entropy": "{{inputs.parameters.task-id-entropy}}",
|
|
1981
|
+
"num_parallel": "{{inputs.parameters.num-parallel}}",
|
|
1982
|
+
}
|
|
1983
|
+
jobset.labels(
|
|
1984
|
+
{
|
|
1985
|
+
**resources["labels"],
|
|
1986
|
+
**self._base_labels,
|
|
1987
|
+
**kubernetes_labels,
|
|
1988
|
+
}
|
|
1989
|
+
)
|
|
1980
1990
|
|
|
1981
1991
|
jobset.environment_variable(
|
|
1982
1992
|
"MF_MASTER_ADDR", jobset.jobset_control_addr
|
|
@@ -2005,27 +2015,23 @@ class ArgoWorkflows(object):
|
|
|
2005
2015
|
"TASK_ID_SUFFIX": "metadata.annotations['jobset.sigs.k8s.io/job-index']",
|
|
2006
2016
|
}
|
|
2007
2017
|
)
|
|
2018
|
+
|
|
2019
|
+
# Set annotations. Do not allow user-specified task-specific annotations to override internal ones.
|
|
2008
2020
|
annotations = {
|
|
2009
2021
|
# setting annotations explicitly as they wont be
|
|
2010
2022
|
# passed down from WorkflowTemplate level
|
|
2011
2023
|
"metaflow/step_name": node.name,
|
|
2012
2024
|
"metaflow/attempt": str(retry_count),
|
|
2013
2025
|
"metaflow/run_id": run_id,
|
|
2014
|
-
"metaflow/production_token": self.production_token,
|
|
2015
|
-
"metaflow/owner": self.username,
|
|
2016
|
-
"metaflow/user": "argo-workflows",
|
|
2017
|
-
"metaflow/flow_name": self.flow.name,
|
|
2018
2026
|
}
|
|
2019
|
-
|
|
2020
|
-
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
for k, v in annotations.items():
|
|
2028
|
-
jobset.annotation(k, v)
|
|
2027
|
+
|
|
2028
|
+
jobset.annotations(
|
|
2029
|
+
{
|
|
2030
|
+
**resources["annotations"],
|
|
2031
|
+
**self._base_annotations,
|
|
2032
|
+
**annotations,
|
|
2033
|
+
}
|
|
2034
|
+
)
|
|
2029
2035
|
|
|
2030
2036
|
jobset.control.replicas(1)
|
|
2031
2037
|
jobset.worker.replicas("{{=asInt(inputs.parameters.workerCount)}}")
|
|
@@ -2082,13 +2088,16 @@ class ArgoWorkflows(object):
|
|
|
2082
2088
|
minutes_between_retries=minutes_between_retries,
|
|
2083
2089
|
)
|
|
2084
2090
|
.metadata(
|
|
2085
|
-
ObjectMeta()
|
|
2091
|
+
ObjectMeta()
|
|
2092
|
+
.annotation("metaflow/step_name", node.name)
|
|
2086
2093
|
# Unfortunately, we can't set the task_id since it is generated
|
|
2087
2094
|
# inside the pod. However, it can be inferred from the annotation
|
|
2088
2095
|
# set by argo-workflows - `workflows.argoproj.io/outputs` - refer
|
|
2089
2096
|
# the field 'task-id' in 'parameters'
|
|
2090
2097
|
# .annotation("metaflow/task_id", ...)
|
|
2091
2098
|
.annotation("metaflow/attempt", retry_count)
|
|
2099
|
+
.annotations(resources["annotations"])
|
|
2100
|
+
.labels(resources["labels"])
|
|
2092
2101
|
)
|
|
2093
2102
|
# Set emptyDir volume for state management
|
|
2094
2103
|
.empty_dir_volume("out")
|
|
@@ -2871,33 +2880,6 @@ class ArgoWorkflows(object):
|
|
|
2871
2880
|
"sdk (https://pypi.org/project/kubernetes/) first."
|
|
2872
2881
|
)
|
|
2873
2882
|
|
|
2874
|
-
labels = {"app.kubernetes.io/part-of": "metaflow"}
|
|
2875
|
-
|
|
2876
|
-
annotations = {
|
|
2877
|
-
"metaflow/production_token": self.production_token,
|
|
2878
|
-
"metaflow/owner": self.username,
|
|
2879
|
-
"metaflow/user": "argo-workflows",
|
|
2880
|
-
"metaflow/flow_name": self.flow.name,
|
|
2881
|
-
}
|
|
2882
|
-
if current.get("project_name"):
|
|
2883
|
-
annotations.update(
|
|
2884
|
-
{
|
|
2885
|
-
"metaflow/project_name": current.project_name,
|
|
2886
|
-
"metaflow/branch_name": current.branch_name,
|
|
2887
|
-
"metaflow/project_flow_name": current.project_flow_name,
|
|
2888
|
-
}
|
|
2889
|
-
)
|
|
2890
|
-
|
|
2891
|
-
# Useful to paint the UI
|
|
2892
|
-
trigger_annotations = {
|
|
2893
|
-
"metaflow/triggered_by": json.dumps(
|
|
2894
|
-
[
|
|
2895
|
-
{key: trigger.get(key) for key in ["name", "type"]}
|
|
2896
|
-
for trigger in self.triggers
|
|
2897
|
-
]
|
|
2898
|
-
)
|
|
2899
|
-
}
|
|
2900
|
-
|
|
2901
2883
|
return (
|
|
2902
2884
|
Sensor()
|
|
2903
2885
|
.metadata(
|
|
@@ -2905,10 +2887,9 @@ class ArgoWorkflows(object):
|
|
|
2905
2887
|
ObjectMeta()
|
|
2906
2888
|
.name(ArgoWorkflows._sensor_name(self.name))
|
|
2907
2889
|
.namespace(KUBERNETES_NAMESPACE)
|
|
2890
|
+
.labels(self._base_labels)
|
|
2908
2891
|
.label("app.kubernetes.io/name", "metaflow-sensor")
|
|
2909
|
-
.
|
|
2910
|
-
.labels(self.kubernetes_labels)
|
|
2911
|
-
.annotations(annotations)
|
|
2892
|
+
.annotations(self._base_annotations)
|
|
2912
2893
|
)
|
|
2913
2894
|
.spec(
|
|
2914
2895
|
SensorSpec().template(
|
|
@@ -2918,7 +2899,7 @@ class ArgoWorkflows(object):
|
|
|
2918
2899
|
ObjectMeta()
|
|
2919
2900
|
.label("app.kubernetes.io/name", "metaflow-sensor")
|
|
2920
2901
|
.label("app.kubernetes.io/part-of", "metaflow")
|
|
2921
|
-
.annotations(
|
|
2902
|
+
.annotations(self._base_annotations)
|
|
2922
2903
|
)
|
|
2923
2904
|
.container(
|
|
2924
2905
|
# Run sensor in guaranteed QoS. The sensor isn't doing a lot
|
|
@@ -2965,6 +2946,7 @@ class ArgoWorkflows(object):
|
|
|
2965
2946
|
"metadata": {
|
|
2966
2947
|
"generateName": "%s-" % self.name,
|
|
2967
2948
|
"namespace": KUBERNETES_NAMESPACE,
|
|
2949
|
+
# Useful to paint the UI
|
|
2968
2950
|
"annotations": {
|
|
2969
2951
|
"metaflow/triggered_by": json.dumps(
|
|
2970
2952
|
[
|
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
import time
|
|
4
3
|
|
|
5
4
|
|
|
6
5
|
from metaflow import current
|
|
7
6
|
from metaflow.decorators import StepDecorator
|
|
8
7
|
from metaflow.events import Trigger
|
|
9
8
|
from metaflow.metadata_provider import MetaDatum
|
|
10
|
-
from metaflow.
|
|
11
|
-
from metaflow.graph import DAGNode, FlowGraph
|
|
9
|
+
from metaflow.graph import FlowGraph
|
|
12
10
|
from metaflow.flowspec import FlowSpec
|
|
13
11
|
from .argo_events import ArgoEvent
|
|
14
12
|
|
|
@@ -42,7 +40,7 @@ class ArgoWorkflowsInternalDecorator(StepDecorator):
|
|
|
42
40
|
if payload != "null": # Argo-Workflow's None
|
|
43
41
|
try:
|
|
44
42
|
payload = json.loads(payload)
|
|
45
|
-
except (TypeError, ValueError)
|
|
43
|
+
except (TypeError, ValueError):
|
|
46
44
|
# There could be arbitrary events that Metaflow doesn't know of
|
|
47
45
|
payload = {}
|
|
48
46
|
triggers.append(
|