ob-metaflow 2.13.0.1__py2.py3-none-any.whl → 2.13.2.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ob-metaflow might be problematic. Click here for more details.
- metaflow/extension_support/plugins.py +1 -0
- metaflow/metadata_provider/heartbeat.py +23 -8
- metaflow/metaflow_config.py +2 -0
- metaflow/parameters.py +1 -1
- metaflow/plugins/__init__.py +13 -0
- metaflow/plugins/argo/argo_client.py +0 -2
- metaflow/plugins/argo/argo_workflows.py +98 -104
- metaflow/plugins/argo/argo_workflows_cli.py +0 -1
- metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
- metaflow/plugins/argo/jobset_input_paths.py +0 -1
- metaflow/plugins/aws/aws_utils.py +6 -1
- metaflow/plugins/aws/batch/batch_client.py +1 -3
- metaflow/plugins/aws/batch/batch_decorator.py +11 -11
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -1
- metaflow/plugins/kubernetes/kube_utils.py +55 -1
- metaflow/plugins/kubernetes/kubernetes.py +33 -80
- metaflow/plugins/kubernetes/kubernetes_cli.py +22 -5
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -2
- metaflow/plugins/kubernetes/kubernetes_job.py +3 -6
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +22 -5
- metaflow/plugins/pypi/bootstrap.py +87 -54
- metaflow/plugins/pypi/conda_environment.py +7 -6
- metaflow/plugins/pypi/micromamba.py +40 -22
- metaflow/plugins/pypi/pip.py +2 -4
- metaflow/plugins/pypi/utils.py +4 -2
- metaflow/runner/metaflow_runner.py +25 -3
- metaflow/runtime.py +18 -8
- metaflow/tracing/tracing_modules.py +4 -1
- metaflow/user_configs/config_parameters.py +23 -6
- metaflow/version.py +1 -1
- {ob_metaflow-2.13.0.1.dist-info → ob_metaflow-2.13.2.1.dist-info}/METADATA +2 -2
- {ob_metaflow-2.13.0.1.dist-info → ob_metaflow-2.13.2.1.dist-info}/RECORD +41 -41
- {ob_metaflow-2.13.0.1.dist-info → ob_metaflow-2.13.2.1.dist-info}/WHEEL +1 -1
- {ob_metaflow-2.13.0.1.dist-info → ob_metaflow-2.13.2.1.dist-info}/LICENSE +0 -0
- {ob_metaflow-2.13.0.1.dist-info → ob_metaflow-2.13.2.1.dist-info}/entry_points.txt +0 -0
- {ob_metaflow-2.13.0.1.dist-info → ob_metaflow-2.13.2.1.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,12 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import time
|
|
3
|
+
from threading import Thread
|
|
4
|
+
|
|
2
5
|
import requests
|
|
3
|
-
import json
|
|
4
6
|
|
|
5
|
-
from threading import Thread
|
|
6
|
-
from metaflow.sidecar import MessageTypes, Message
|
|
7
|
-
from metaflow.metaflow_config import SERVICE_HEADERS
|
|
8
7
|
from metaflow.exception import MetaflowException
|
|
8
|
+
from metaflow.metaflow_config import SERVICE_HEADERS
|
|
9
|
+
from metaflow.sidecar import Message, MessageTypes
|
|
9
10
|
|
|
10
11
|
HB_URL_KEY = "hb_url"
|
|
11
12
|
|
|
@@ -52,13 +53,27 @@ class MetadataHeartBeat(object):
|
|
|
52
53
|
retry_counter = 0
|
|
53
54
|
except HeartBeatException as e:
|
|
54
55
|
retry_counter = retry_counter + 1
|
|
55
|
-
time.sleep(
|
|
56
|
+
time.sleep(1.5**retry_counter)
|
|
56
57
|
|
|
57
58
|
def _heartbeat(self):
|
|
58
59
|
if self.hb_url is not None:
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
60
|
+
try:
|
|
61
|
+
response = requests.post(
|
|
62
|
+
url=self.hb_url, data="{}", headers=self.headers.copy()
|
|
63
|
+
)
|
|
64
|
+
except requests.exceptions.ConnectionError as e:
|
|
65
|
+
raise HeartBeatException(
|
|
66
|
+
"HeartBeat request (%s) failed" " (ConnectionError)" % (self.hb_url)
|
|
67
|
+
)
|
|
68
|
+
except requests.exceptions.Timeout as e:
|
|
69
|
+
raise HeartBeatException(
|
|
70
|
+
"HeartBeat request (%s) failed" " (Timeout)" % (self.hb_url)
|
|
71
|
+
)
|
|
72
|
+
except requests.exceptions.RequestException as e:
|
|
73
|
+
raise HeartBeatException(
|
|
74
|
+
"HeartBeat request (%s) failed"
|
|
75
|
+
" (RequestException) %s" % (self.hb_url, str(e))
|
|
76
|
+
)
|
|
62
77
|
# Unfortunately, response.json() returns a string that we need
|
|
63
78
|
# to cast to json; however when the request encounters an error
|
|
64
79
|
# the return type is a json blob :/
|
metaflow/metaflow_config.py
CHANGED
|
@@ -356,6 +356,8 @@ KUBERNETES_PERSISTENT_VOLUME_CLAIMS = from_conf(
|
|
|
356
356
|
KUBERNETES_SECRETS = from_conf("KUBERNETES_SECRETS", "")
|
|
357
357
|
# Default labels for kubernetes pods
|
|
358
358
|
KUBERNETES_LABELS = from_conf("KUBERNETES_LABELS", "")
|
|
359
|
+
# Default annotations for kubernetes pods
|
|
360
|
+
KUBERNETES_ANNOTATIONS = from_conf("KUBERNETES_ANNOTATIONS", "")
|
|
359
361
|
# Default GPU vendor to use by K8S jobs created by Metaflow (supports nvidia, amd)
|
|
360
362
|
KUBERNETES_GPU_VENDOR = from_conf("KUBERNETES_GPU_VENDOR", "nvidia")
|
|
361
363
|
# Default container image for K8S
|
metaflow/parameters.py
CHANGED
|
@@ -369,7 +369,7 @@ class Parameter(object):
|
|
|
369
369
|
# Resolve any value from configurations
|
|
370
370
|
self.kwargs = unpack_delayed_evaluator(self.kwargs, ignore_errors=ignore_errors)
|
|
371
371
|
# Do it one item at a time so errors are ignored at that level (as opposed to
|
|
372
|
-
# at the entire kwargs
|
|
372
|
+
# at the entire kwargs level)
|
|
373
373
|
self.kwargs = {
|
|
374
374
|
k: resolve_delayed_evaluator(v, ignore_errors=ignore_errors)
|
|
375
375
|
for k, v in self.kwargs.items()
|
metaflow/plugins/__init__.py
CHANGED
|
@@ -19,6 +19,11 @@ CLIS_DESC = [
|
|
|
19
19
|
("logs", ".logs_cli.cli"),
|
|
20
20
|
]
|
|
21
21
|
|
|
22
|
+
# Add additional commands to the runner here
|
|
23
|
+
# These will be accessed using Runner().<command>()
|
|
24
|
+
RUNNER_CLIS_DESC = []
|
|
25
|
+
|
|
26
|
+
|
|
22
27
|
from .test_unbounded_foreach_decorator import InternalTestUnboundedForeachInput
|
|
23
28
|
|
|
24
29
|
# Add new step decorators here
|
|
@@ -168,6 +173,14 @@ def get_plugin_cli_path():
|
|
|
168
173
|
return resolve_plugins("cli", path_only=True)
|
|
169
174
|
|
|
170
175
|
|
|
176
|
+
def get_runner_cli():
|
|
177
|
+
return resolve_plugins("runner_cli")
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def get_runner_cli_path():
|
|
181
|
+
return resolve_plugins("runner_cli", path_only=True)
|
|
182
|
+
|
|
183
|
+
|
|
171
184
|
STEP_DECORATORS = resolve_plugins("step_decorator")
|
|
172
185
|
FLOW_DECORATORS = resolve_plugins("flow_decorator")
|
|
173
186
|
ENVIRONMENTS = resolve_plugins("environment")
|
|
@@ -7,12 +7,11 @@ import sys
|
|
|
7
7
|
from collections import defaultdict
|
|
8
8
|
from hashlib import sha1
|
|
9
9
|
from math import inf
|
|
10
|
-
from typing import List, Tuple
|
|
11
10
|
|
|
12
11
|
from metaflow import JSONType, current
|
|
13
12
|
from metaflow.decorators import flow_decorators
|
|
14
13
|
from metaflow.exception import MetaflowException
|
|
15
|
-
from metaflow.graph import
|
|
14
|
+
from metaflow.graph import FlowGraph
|
|
16
15
|
from metaflow.includefile import FilePathClass
|
|
17
16
|
from metaflow.metaflow_config import (
|
|
18
17
|
ARGO_EVENTS_EVENT,
|
|
@@ -39,9 +38,7 @@ from metaflow.metaflow_config import (
|
|
|
39
38
|
DEFAULT_SECRETS_BACKEND_TYPE,
|
|
40
39
|
GCP_SECRET_MANAGER_PREFIX,
|
|
41
40
|
KUBERNETES_FETCH_EC2_METADATA,
|
|
42
|
-
KUBERNETES_LABELS,
|
|
43
41
|
KUBERNETES_NAMESPACE,
|
|
44
|
-
KUBERNETES_NODE_SELECTOR,
|
|
45
42
|
KUBERNETES_SANDBOX_INIT_SCRIPT,
|
|
46
43
|
KUBERNETES_SECRETS,
|
|
47
44
|
S3_ENDPOINT_URL,
|
|
@@ -55,10 +52,7 @@ from metaflow.metaflow_config_funcs import config_values, init_config
|
|
|
55
52
|
from metaflow.mflog import BASH_SAVE_LOGS, bash_capture_logs, export_mflog_env_vars
|
|
56
53
|
from metaflow.parameters import deploy_time_eval
|
|
57
54
|
from metaflow.plugins.kubernetes.kube_utils import qos_requests_and_limits
|
|
58
|
-
|
|
59
|
-
parse_kube_keyvalue_list,
|
|
60
|
-
validate_kube_labels,
|
|
61
|
-
)
|
|
55
|
+
|
|
62
56
|
from metaflow.plugins.kubernetes.kubernetes_jobsets import KubernetesArgoJobSet
|
|
63
57
|
from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
|
|
64
58
|
from metaflow.user_configs.config_options import ConfigInput
|
|
@@ -174,7 +168,8 @@ class ArgoWorkflows(object):
|
|
|
174
168
|
self.triggers, self.trigger_options = self._process_triggers()
|
|
175
169
|
self._schedule, self._timezone = self._get_schedule()
|
|
176
170
|
|
|
177
|
-
self.
|
|
171
|
+
self._base_labels = self._base_kubernetes_labels()
|
|
172
|
+
self._base_annotations = self._base_kubernetes_annotations()
|
|
178
173
|
self._workflow_template = self._compile_workflow_template()
|
|
179
174
|
self._sensor = self._compile_sensor()
|
|
180
175
|
|
|
@@ -311,7 +306,7 @@ class ArgoWorkflows(object):
|
|
|
311
306
|
try:
|
|
312
307
|
# Check that the workflow was deployed through Metaflow
|
|
313
308
|
workflow_template["metadata"]["annotations"]["metaflow/owner"]
|
|
314
|
-
except KeyError
|
|
309
|
+
except KeyError:
|
|
315
310
|
raise ArgoWorkflowsException(
|
|
316
311
|
"An existing non-metaflow workflow with the same name as "
|
|
317
312
|
"*%s* already exists in Argo Workflows. \nPlease modify the "
|
|
@@ -325,18 +320,42 @@ class ArgoWorkflows(object):
|
|
|
325
320
|
except Exception as e:
|
|
326
321
|
raise ArgoWorkflowsException(str(e))
|
|
327
322
|
|
|
328
|
-
|
|
329
|
-
def _get_kubernetes_labels():
|
|
323
|
+
def _base_kubernetes_labels(self):
|
|
330
324
|
"""
|
|
331
|
-
Get Kubernetes labels
|
|
332
|
-
Parses the string into a dict and validates that values adhere to Kubernetes restrictions.
|
|
325
|
+
Get shared Kubernetes labels for Argo resources.
|
|
333
326
|
"""
|
|
334
|
-
if
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
327
|
+
# TODO: Add configuration through an environment variable or Metaflow config in the future if required.
|
|
328
|
+
labels = {"app.kubernetes.io/part-of": "metaflow"}
|
|
329
|
+
|
|
330
|
+
return labels
|
|
331
|
+
|
|
332
|
+
def _base_kubernetes_annotations(self):
|
|
333
|
+
"""
|
|
334
|
+
Get shared Kubernetes annotations for Argo resources.
|
|
335
|
+
"""
|
|
336
|
+
from datetime import datetime, timezone
|
|
337
|
+
|
|
338
|
+
# TODO: Add configuration through an environment variable or Metaflow config in the future if required.
|
|
339
|
+
# base annotations
|
|
340
|
+
annotations = {
|
|
341
|
+
"metaflow/production_token": self.production_token,
|
|
342
|
+
"metaflow/owner": self.username,
|
|
343
|
+
"metaflow/user": "argo-workflows",
|
|
344
|
+
"metaflow/flow_name": self.flow.name,
|
|
345
|
+
"metaflow/deployment_timestamp": str(
|
|
346
|
+
datetime.now(timezone.utc).isoformat()
|
|
347
|
+
),
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
if current.get("project_name"):
|
|
351
|
+
annotations.update(
|
|
352
|
+
{
|
|
353
|
+
"metaflow/project_name": current.project_name,
|
|
354
|
+
"metaflow/branch_name": current.branch_name,
|
|
355
|
+
"metaflow/project_flow_name": current.project_flow_name,
|
|
356
|
+
}
|
|
357
|
+
)
|
|
358
|
+
return annotations
|
|
340
359
|
|
|
341
360
|
def _get_schedule(self):
|
|
342
361
|
schedule = self.flow._flow_decorators.get("schedule")
|
|
@@ -412,7 +431,7 @@ class ArgoWorkflows(object):
|
|
|
412
431
|
"metaflow/production_token"
|
|
413
432
|
],
|
|
414
433
|
)
|
|
415
|
-
except KeyError
|
|
434
|
+
except KeyError:
|
|
416
435
|
raise ArgoWorkflowsException(
|
|
417
436
|
"An existing non-metaflow workflow with the same name as "
|
|
418
437
|
"*%s* already exists in Argo Workflows. \nPlease modify the "
|
|
@@ -677,18 +696,7 @@ class ArgoWorkflows(object):
|
|
|
677
696
|
# generate container templates at the top level (in WorkflowSpec) and maintain
|
|
678
697
|
# references to them within the DAGTask.
|
|
679
698
|
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
annotations = {
|
|
683
|
-
"metaflow/production_token": self.production_token,
|
|
684
|
-
"metaflow/owner": self.username,
|
|
685
|
-
"metaflow/user": "argo-workflows",
|
|
686
|
-
"metaflow/flow_name": self.flow.name,
|
|
687
|
-
"metaflow/deployment_timestamp": str(
|
|
688
|
-
datetime.now(timezone.utc).isoformat()
|
|
689
|
-
),
|
|
690
|
-
}
|
|
691
|
-
|
|
699
|
+
annotations = {}
|
|
692
700
|
if self._schedule is not None:
|
|
693
701
|
# timezone is an optional field and json dumps on None will result in null
|
|
694
702
|
# hence configuring it to an empty string
|
|
@@ -700,15 +708,6 @@ class ArgoWorkflows(object):
|
|
|
700
708
|
if self.parameters:
|
|
701
709
|
annotations.update({"metaflow/parameters": json.dumps(self.parameters)})
|
|
702
710
|
|
|
703
|
-
if current.get("project_name"):
|
|
704
|
-
annotations.update(
|
|
705
|
-
{
|
|
706
|
-
"metaflow/project_name": current.project_name,
|
|
707
|
-
"metaflow/branch_name": current.branch_name,
|
|
708
|
-
"metaflow/project_flow_name": current.project_flow_name,
|
|
709
|
-
}
|
|
710
|
-
)
|
|
711
|
-
|
|
712
711
|
# Some more annotations to populate the Argo UI nicely
|
|
713
712
|
if self.tags:
|
|
714
713
|
annotations.update({"metaflow/tags": json.dumps(self.tags)})
|
|
@@ -745,6 +744,17 @@ class ArgoWorkflows(object):
|
|
|
745
744
|
)
|
|
746
745
|
}
|
|
747
746
|
)
|
|
747
|
+
try:
|
|
748
|
+
# Build the DAG based on the DAGNodes given by the FlowGraph for the found FlowSpec class.
|
|
749
|
+
_steps_info, graph_structure = self.graph.output_steps()
|
|
750
|
+
graph_info = {
|
|
751
|
+
# for the time being, we only need the graph_structure. Being mindful of annotation size limits we do not include anything extra.
|
|
752
|
+
"graph_structure": graph_structure
|
|
753
|
+
}
|
|
754
|
+
except Exception:
|
|
755
|
+
graph_info = None
|
|
756
|
+
|
|
757
|
+
dag_annotation = {"metaflow/dag": json.dumps(graph_info)}
|
|
748
758
|
|
|
749
759
|
return (
|
|
750
760
|
WorkflowTemplate()
|
|
@@ -756,9 +766,11 @@ class ArgoWorkflows(object):
|
|
|
756
766
|
# is released, we should be able to support multi-namespace /
|
|
757
767
|
# multi-cluster scheduling.
|
|
758
768
|
.namespace(KUBERNETES_NAMESPACE)
|
|
759
|
-
.label("app.kubernetes.io/name", "metaflow-flow")
|
|
760
|
-
.label("app.kubernetes.io/part-of", "metaflow")
|
|
761
769
|
.annotations(annotations)
|
|
770
|
+
.annotations(self._base_annotations)
|
|
771
|
+
.labels(self._base_labels)
|
|
772
|
+
.label("app.kubernetes.io/name", "metaflow-flow")
|
|
773
|
+
.annotations(dag_annotation)
|
|
762
774
|
)
|
|
763
775
|
.spec(
|
|
764
776
|
WorkflowSpec()
|
|
@@ -788,10 +800,14 @@ class ArgoWorkflows(object):
|
|
|
788
800
|
# Set workflow metadata
|
|
789
801
|
.workflow_metadata(
|
|
790
802
|
Metadata()
|
|
803
|
+
.labels(self._base_labels)
|
|
791
804
|
.label("app.kubernetes.io/name", "metaflow-run")
|
|
792
|
-
.label("app.kubernetes.io/part-of", "metaflow")
|
|
793
805
|
.annotations(
|
|
794
|
-
{
|
|
806
|
+
{
|
|
807
|
+
**annotations,
|
|
808
|
+
**self._base_annotations,
|
|
809
|
+
**{"metaflow/run_id": "argo-{{workflow.name}}"},
|
|
810
|
+
}
|
|
795
811
|
)
|
|
796
812
|
# TODO: Set dynamic labels using labels_from. Ideally, we would
|
|
797
813
|
# want to expose run_id as a label. It's easy to add labels,
|
|
@@ -824,10 +840,10 @@ class ArgoWorkflows(object):
|
|
|
824
840
|
# Set common pod metadata.
|
|
825
841
|
.pod_metadata(
|
|
826
842
|
Metadata()
|
|
843
|
+
.labels(self._base_labels)
|
|
827
844
|
.label("app.kubernetes.io/name", "metaflow-task")
|
|
828
|
-
.label("app.kubernetes.io/part-of", "metaflow")
|
|
829
845
|
.annotations(annotations)
|
|
830
|
-
.
|
|
846
|
+
.annotations(self._base_annotations)
|
|
831
847
|
)
|
|
832
848
|
# Set the entrypoint to flow name
|
|
833
849
|
.entrypoint(self.flow.name)
|
|
@@ -1911,15 +1927,7 @@ class ArgoWorkflows(object):
|
|
|
1911
1927
|
# twice, but due to issues with variable substitution, we will have to
|
|
1912
1928
|
# live with this routine.
|
|
1913
1929
|
if node.parallel_step:
|
|
1914
|
-
# Explicitly add the task-id-hint label. This is important because this label
|
|
1915
|
-
# is returned as an Output parameter of this step and is used subsequently as an
|
|
1916
|
-
# an input in the join step.
|
|
1917
|
-
kubernetes_labels = self.kubernetes_labels.copy()
|
|
1918
1930
|
jobset_name = "{{inputs.parameters.jobset-name}}"
|
|
1919
|
-
kubernetes_labels["task_id_entropy"] = (
|
|
1920
|
-
"{{inputs.parameters.task-id-entropy}}"
|
|
1921
|
-
)
|
|
1922
|
-
kubernetes_labels["num_parallel"] = "{{inputs.parameters.num-parallel}}"
|
|
1923
1931
|
jobset = KubernetesArgoJobSet(
|
|
1924
1932
|
kubernetes_sdk=kubernetes_sdk,
|
|
1925
1933
|
name=jobset_name,
|
|
@@ -1975,8 +1983,22 @@ class ArgoWorkflows(object):
|
|
|
1975
1983
|
for k, v in env.items():
|
|
1976
1984
|
jobset.environment_variable(k, v)
|
|
1977
1985
|
|
|
1978
|
-
|
|
1979
|
-
|
|
1986
|
+
# Set labels. Do not allow user-specified task labels to override internal ones.
|
|
1987
|
+
#
|
|
1988
|
+
# Explicitly add the task-id-hint label. This is important because this label
|
|
1989
|
+
# is returned as an Output parameter of this step and is used subsequently as an
|
|
1990
|
+
# an input in the join step.
|
|
1991
|
+
kubernetes_labels = {
|
|
1992
|
+
"task_id_entropy": "{{inputs.parameters.task-id-entropy}}",
|
|
1993
|
+
"num_parallel": "{{inputs.parameters.num-parallel}}",
|
|
1994
|
+
}
|
|
1995
|
+
jobset.labels(
|
|
1996
|
+
{
|
|
1997
|
+
**resources["labels"],
|
|
1998
|
+
**self._base_labels,
|
|
1999
|
+
**kubernetes_labels,
|
|
2000
|
+
}
|
|
2001
|
+
)
|
|
1980
2002
|
|
|
1981
2003
|
jobset.environment_variable(
|
|
1982
2004
|
"MF_MASTER_ADDR", jobset.jobset_control_addr
|
|
@@ -2005,27 +2027,23 @@ class ArgoWorkflows(object):
|
|
|
2005
2027
|
"TASK_ID_SUFFIX": "metadata.annotations['jobset.sigs.k8s.io/job-index']",
|
|
2006
2028
|
}
|
|
2007
2029
|
)
|
|
2030
|
+
|
|
2031
|
+
# Set annotations. Do not allow user-specified task-specific annotations to override internal ones.
|
|
2008
2032
|
annotations = {
|
|
2009
2033
|
# setting annotations explicitly as they wont be
|
|
2010
2034
|
# passed down from WorkflowTemplate level
|
|
2011
2035
|
"metaflow/step_name": node.name,
|
|
2012
2036
|
"metaflow/attempt": str(retry_count),
|
|
2013
2037
|
"metaflow/run_id": run_id,
|
|
2014
|
-
"metaflow/production_token": self.production_token,
|
|
2015
|
-
"metaflow/owner": self.username,
|
|
2016
|
-
"metaflow/user": "argo-workflows",
|
|
2017
|
-
"metaflow/flow_name": self.flow.name,
|
|
2018
2038
|
}
|
|
2019
|
-
|
|
2020
|
-
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
for k, v in annotations.items():
|
|
2028
|
-
jobset.annotation(k, v)
|
|
2039
|
+
|
|
2040
|
+
jobset.annotations(
|
|
2041
|
+
{
|
|
2042
|
+
**resources["annotations"],
|
|
2043
|
+
**self._base_annotations,
|
|
2044
|
+
**annotations,
|
|
2045
|
+
}
|
|
2046
|
+
)
|
|
2029
2047
|
|
|
2030
2048
|
jobset.control.replicas(1)
|
|
2031
2049
|
jobset.worker.replicas("{{=asInt(inputs.parameters.workerCount)}}")
|
|
@@ -2082,13 +2100,16 @@ class ArgoWorkflows(object):
|
|
|
2082
2100
|
minutes_between_retries=minutes_between_retries,
|
|
2083
2101
|
)
|
|
2084
2102
|
.metadata(
|
|
2085
|
-
ObjectMeta()
|
|
2103
|
+
ObjectMeta()
|
|
2104
|
+
.annotation("metaflow/step_name", node.name)
|
|
2086
2105
|
# Unfortunately, we can't set the task_id since it is generated
|
|
2087
2106
|
# inside the pod. However, it can be inferred from the annotation
|
|
2088
2107
|
# set by argo-workflows - `workflows.argoproj.io/outputs` - refer
|
|
2089
2108
|
# the field 'task-id' in 'parameters'
|
|
2090
2109
|
# .annotation("metaflow/task_id", ...)
|
|
2091
2110
|
.annotation("metaflow/attempt", retry_count)
|
|
2111
|
+
.annotations(resources["annotations"])
|
|
2112
|
+
.labels(resources["labels"])
|
|
2092
2113
|
)
|
|
2093
2114
|
# Set emptyDir volume for state management
|
|
2094
2115
|
.empty_dir_volume("out")
|
|
@@ -2871,33 +2892,6 @@ class ArgoWorkflows(object):
|
|
|
2871
2892
|
"sdk (https://pypi.org/project/kubernetes/) first."
|
|
2872
2893
|
)
|
|
2873
2894
|
|
|
2874
|
-
labels = {"app.kubernetes.io/part-of": "metaflow"}
|
|
2875
|
-
|
|
2876
|
-
annotations = {
|
|
2877
|
-
"metaflow/production_token": self.production_token,
|
|
2878
|
-
"metaflow/owner": self.username,
|
|
2879
|
-
"metaflow/user": "argo-workflows",
|
|
2880
|
-
"metaflow/flow_name": self.flow.name,
|
|
2881
|
-
}
|
|
2882
|
-
if current.get("project_name"):
|
|
2883
|
-
annotations.update(
|
|
2884
|
-
{
|
|
2885
|
-
"metaflow/project_name": current.project_name,
|
|
2886
|
-
"metaflow/branch_name": current.branch_name,
|
|
2887
|
-
"metaflow/project_flow_name": current.project_flow_name,
|
|
2888
|
-
}
|
|
2889
|
-
)
|
|
2890
|
-
|
|
2891
|
-
# Useful to paint the UI
|
|
2892
|
-
trigger_annotations = {
|
|
2893
|
-
"metaflow/triggered_by": json.dumps(
|
|
2894
|
-
[
|
|
2895
|
-
{key: trigger.get(key) for key in ["name", "type"]}
|
|
2896
|
-
for trigger in self.triggers
|
|
2897
|
-
]
|
|
2898
|
-
)
|
|
2899
|
-
}
|
|
2900
|
-
|
|
2901
2895
|
return (
|
|
2902
2896
|
Sensor()
|
|
2903
2897
|
.metadata(
|
|
@@ -2905,10 +2899,9 @@ class ArgoWorkflows(object):
|
|
|
2905
2899
|
ObjectMeta()
|
|
2906
2900
|
.name(ArgoWorkflows._sensor_name(self.name))
|
|
2907
2901
|
.namespace(KUBERNETES_NAMESPACE)
|
|
2902
|
+
.labels(self._base_labels)
|
|
2908
2903
|
.label("app.kubernetes.io/name", "metaflow-sensor")
|
|
2909
|
-
.
|
|
2910
|
-
.labels(self.kubernetes_labels)
|
|
2911
|
-
.annotations(annotations)
|
|
2904
|
+
.annotations(self._base_annotations)
|
|
2912
2905
|
)
|
|
2913
2906
|
.spec(
|
|
2914
2907
|
SensorSpec().template(
|
|
@@ -2918,7 +2911,7 @@ class ArgoWorkflows(object):
|
|
|
2918
2911
|
ObjectMeta()
|
|
2919
2912
|
.label("app.kubernetes.io/name", "metaflow-sensor")
|
|
2920
2913
|
.label("app.kubernetes.io/part-of", "metaflow")
|
|
2921
|
-
.annotations(
|
|
2914
|
+
.annotations(self._base_annotations)
|
|
2922
2915
|
)
|
|
2923
2916
|
.container(
|
|
2924
2917
|
# Run sensor in guaranteed QoS. The sensor isn't doing a lot
|
|
@@ -2965,6 +2958,7 @@ class ArgoWorkflows(object):
|
|
|
2965
2958
|
"metadata": {
|
|
2966
2959
|
"generateName": "%s-" % self.name,
|
|
2967
2960
|
"namespace": KUBERNETES_NAMESPACE,
|
|
2961
|
+
# Useful to paint the UI
|
|
2968
2962
|
"annotations": {
|
|
2969
2963
|
"metaflow/triggered_by": json.dumps(
|
|
2970
2964
|
[
|
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
import time
|
|
4
3
|
|
|
5
4
|
|
|
6
5
|
from metaflow import current
|
|
7
6
|
from metaflow.decorators import StepDecorator
|
|
8
7
|
from metaflow.events import Trigger
|
|
9
8
|
from metaflow.metadata_provider import MetaDatum
|
|
10
|
-
from metaflow.
|
|
11
|
-
from metaflow.graph import DAGNode, FlowGraph
|
|
9
|
+
from metaflow.graph import FlowGraph
|
|
12
10
|
from metaflow.flowspec import FlowSpec
|
|
13
11
|
from .argo_events import ArgoEvent
|
|
14
12
|
|
|
@@ -42,7 +40,7 @@ class ArgoWorkflowsInternalDecorator(StepDecorator):
|
|
|
42
40
|
if payload != "null": # Argo-Workflow's None
|
|
43
41
|
try:
|
|
44
42
|
payload = json.loads(payload)
|
|
45
|
-
except (TypeError, ValueError)
|
|
43
|
+
except (TypeError, ValueError):
|
|
46
44
|
# There could be arbitrary events that Metaflow doesn't know of
|
|
47
45
|
payload = {}
|
|
48
46
|
triggers.append(
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import re
|
|
2
|
-
import requests
|
|
3
2
|
|
|
4
3
|
from metaflow.exception import MetaflowException
|
|
5
4
|
from metaflow.metaflow_config import MAX_MEMORY_PER_TASK, MAX_CPU_PER_TASK
|
|
@@ -31,6 +30,10 @@ def get_ec2_instance_metadata():
|
|
|
31
30
|
- ec2-region
|
|
32
31
|
- ec2-availability-zone
|
|
33
32
|
"""
|
|
33
|
+
|
|
34
|
+
# TODO: Remove dependency on requests
|
|
35
|
+
import requests
|
|
36
|
+
|
|
34
37
|
meta = {}
|
|
35
38
|
# Capture AWS instance identity metadata. This is best-effort only since
|
|
36
39
|
# access to this end-point might be blocked on AWS and not available
|
|
@@ -160,6 +163,8 @@ def compute_resource_attributes(decos, compute_deco, step_name, resource_default
|
|
|
160
163
|
# Here we don't have ints, so we compare the value and raise
|
|
161
164
|
# an exception if not equal
|
|
162
165
|
if my_val != v:
|
|
166
|
+
# TODO: Throw a better exception since the user has no
|
|
167
|
+
# knowledge of 'compute' decorator
|
|
163
168
|
raise MetaflowException(
|
|
164
169
|
"'resources' and compute decorator have conflicting "
|
|
165
170
|
"values for '%s'. Please use consistent values or "
|
|
@@ -1,34 +1,30 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import sys
|
|
3
2
|
import platform
|
|
4
|
-
import
|
|
3
|
+
import sys
|
|
5
4
|
import time
|
|
6
5
|
|
|
7
|
-
from metaflow import util
|
|
8
6
|
from metaflow import R, current
|
|
9
|
-
|
|
10
7
|
from metaflow.decorators import StepDecorator
|
|
11
|
-
from metaflow.plugins.resources_decorator import ResourcesDecorator
|
|
12
|
-
from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
|
|
13
8
|
from metaflow.metadata_provider import MetaDatum
|
|
14
9
|
from metaflow.metadata_provider.util import sync_local_metadata_to_datastore
|
|
15
10
|
from metaflow.metaflow_config import (
|
|
16
|
-
ECS_S3_ACCESS_IAM_ROLE,
|
|
17
|
-
BATCH_JOB_QUEUE,
|
|
18
11
|
BATCH_CONTAINER_IMAGE,
|
|
19
12
|
BATCH_CONTAINER_REGISTRY,
|
|
20
|
-
|
|
13
|
+
BATCH_JOB_QUEUE,
|
|
21
14
|
DATASTORE_LOCAL_DIR,
|
|
15
|
+
ECS_FARGATE_EXECUTION_ROLE,
|
|
16
|
+
ECS_S3_ACCESS_IAM_ROLE,
|
|
22
17
|
)
|
|
18
|
+
from metaflow.plugins.timeout_decorator import get_run_time_limit_for_task
|
|
23
19
|
from metaflow.sidecar import Sidecar
|
|
24
20
|
from metaflow.unbounded_foreach import UBF_CONTROL
|
|
25
21
|
|
|
26
|
-
from .batch import BatchException
|
|
27
22
|
from ..aws_utils import (
|
|
28
23
|
compute_resource_attributes,
|
|
29
24
|
get_docker_registry,
|
|
30
25
|
get_ec2_instance_metadata,
|
|
31
26
|
)
|
|
27
|
+
from .batch import BatchException
|
|
32
28
|
|
|
33
29
|
|
|
34
30
|
class BatchDecorator(StepDecorator):
|
|
@@ -280,6 +276,10 @@ class BatchDecorator(StepDecorator):
|
|
|
280
276
|
# Metaflow would be running the container agent compatible with
|
|
281
277
|
# version V4.
|
|
282
278
|
# https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-metadata-endpoint.html
|
|
279
|
+
|
|
280
|
+
# TODO: Remove dependency on requests
|
|
281
|
+
import requests
|
|
282
|
+
|
|
283
283
|
try:
|
|
284
284
|
logs_meta = (
|
|
285
285
|
requests.get(url=os.environ["ECS_CONTAINER_METADATA_URI_V4"])
|
|
@@ -386,7 +386,7 @@ class BatchDecorator(StepDecorator):
|
|
|
386
386
|
len(flow._control_mapper_tasks),
|
|
387
387
|
)
|
|
388
388
|
)
|
|
389
|
-
except Exception
|
|
389
|
+
except Exception:
|
|
390
390
|
pass
|
|
391
391
|
raise Exception(
|
|
392
392
|
"Batch secondary workers did not finish in %s seconds" % TIMEOUT
|
|
@@ -236,7 +236,7 @@ class StepFunctions(object):
|
|
|
236
236
|
return parameters.get("metaflow.owner"), parameters.get(
|
|
237
237
|
"metaflow.production_token"
|
|
238
238
|
)
|
|
239
|
-
except KeyError
|
|
239
|
+
except KeyError:
|
|
240
240
|
raise StepFunctionsException(
|
|
241
241
|
"An existing non-metaflow "
|
|
242
242
|
"workflow with the same name as "
|
|
@@ -4,7 +4,6 @@ import re
|
|
|
4
4
|
from hashlib import sha1
|
|
5
5
|
|
|
6
6
|
from metaflow import JSONType, current, decorators, parameters
|
|
7
|
-
from metaflow.client.core import get_metadata
|
|
8
7
|
from metaflow._vendor import click
|
|
9
8
|
from metaflow.exception import MetaflowException, MetaflowInternalError
|
|
10
9
|
from metaflow.metaflow_config import (
|