metaflow 2.13__py2.py3-none-any.whl → 2.13.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metaflow/metadata_provider/heartbeat.py +23 -8
- metaflow/metaflow_config.py +2 -0
- metaflow/plugins/argo/argo_client.py +0 -2
- metaflow/plugins/argo/argo_workflows.py +86 -104
- metaflow/plugins/argo/argo_workflows_cli.py +0 -1
- metaflow/plugins/argo/argo_workflows_decorator.py +2 -4
- metaflow/plugins/argo/jobset_input_paths.py +0 -1
- metaflow/plugins/aws/aws_utils.py +6 -1
- metaflow/plugins/aws/batch/batch_client.py +1 -3
- metaflow/plugins/aws/batch/batch_decorator.py +11 -11
- metaflow/plugins/aws/step_functions/dynamo_db_client.py +0 -3
- metaflow/plugins/aws/step_functions/production_token.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions.py +1 -1
- metaflow/plugins/aws/step_functions/step_functions_cli.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_decorator.py +0 -1
- metaflow/plugins/aws/step_functions/step_functions_deployer_objects.py +0 -1
- metaflow/plugins/kubernetes/kube_utils.py +55 -1
- metaflow/plugins/kubernetes/kubernetes.py +33 -80
- metaflow/plugins/kubernetes/kubernetes_cli.py +22 -5
- metaflow/plugins/kubernetes/kubernetes_decorator.py +49 -2
- metaflow/plugins/kubernetes/kubernetes_job.py +3 -6
- metaflow/plugins/kubernetes/kubernetes_jobsets.py +22 -5
- metaflow/plugins/pypi/bootstrap.py +87 -54
- metaflow/plugins/pypi/conda_environment.py +7 -6
- metaflow/plugins/pypi/micromamba.py +35 -21
- metaflow/plugins/pypi/pip.py +2 -4
- metaflow/plugins/pypi/utils.py +4 -2
- metaflow/version.py +1 -1
- {metaflow-2.13.dist-info → metaflow-2.13.1.dist-info}/METADATA +2 -2
- {metaflow-2.13.dist-info → metaflow-2.13.1.dist-info}/RECORD +34 -34
- {metaflow-2.13.dist-info → metaflow-2.13.1.dist-info}/WHEEL +1 -1
- {metaflow-2.13.dist-info → metaflow-2.13.1.dist-info}/LICENSE +0 -0
- {metaflow-2.13.dist-info → metaflow-2.13.1.dist-info}/entry_points.txt +0 -0
- {metaflow-2.13.dist-info → metaflow-2.13.1.dist-info}/top_level.txt +0 -0
| @@ -1,7 +1,14 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            import re
         | 
| 2 | 
            +
            from typing import Dict, List, Optional
         | 
| 3 | 
            +
            from metaflow.exception import CommandException, MetaflowException
         | 
| 2 4 | 
             
            from metaflow.util import get_username, get_latest_run_id
         | 
| 3 5 |  | 
| 4 6 |  | 
| 7 | 
            +
            # avoid circular import by having the exception class contained here
         | 
| 8 | 
            +
            class KubernetesException(MetaflowException):
         | 
| 9 | 
            +
                headline = "Kubernetes error"
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 5 12 | 
             
            def parse_cli_options(flow_name, run_id, user, my_runs, echo):
         | 
| 6 13 | 
             
                if user and my_runs:
         | 
| 7 14 | 
             
                    raise CommandException("--user and --my-runs are mutually exclusive.")
         | 
| @@ -52,3 +59,50 @@ def qos_requests_and_limits(qos: str, cpu: int, memory: int, storage: int): | |
| 52 59 | 
             
                # TODO: Add support for BestEffort once there is a use case for it.
         | 
| 53 60 | 
             
                # BestEffort - no limit or requests for cpu/memory
         | 
| 54 61 | 
             
                return qos_requests, qos_limits
         | 
| 62 | 
            +
             | 
| 63 | 
            +
             | 
| 64 | 
            +
            def validate_kube_labels(
         | 
| 65 | 
            +
                labels: Optional[Dict[str, Optional[str]]],
         | 
| 66 | 
            +
            ) -> bool:
         | 
| 67 | 
            +
                """Validate label values.
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                This validates the kubernetes label values.  It does not validate the keys.
         | 
| 70 | 
            +
                Ideally, keys should be static and also the validation rules for keys are
         | 
| 71 | 
            +
                more complex than those for values.  For full validation rules, see:
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
         | 
| 74 | 
            +
                """
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                def validate_label(s: Optional[str]):
         | 
| 77 | 
            +
                    regex_match = r"^(([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9])?$"
         | 
| 78 | 
            +
                    if not s:
         | 
| 79 | 
            +
                        # allow empty label
         | 
| 80 | 
            +
                        return True
         | 
| 81 | 
            +
                    if not re.search(regex_match, s):
         | 
| 82 | 
            +
                        raise KubernetesException(
         | 
| 83 | 
            +
                            'Invalid value: "%s"\n'
         | 
| 84 | 
            +
                            "A valid label must be an empty string or one that\n"
         | 
| 85 | 
            +
                            "  - Consist of alphanumeric, '-', '_' or '.' characters\n"
         | 
| 86 | 
            +
                            "  - Begins and ends with an alphanumeric character\n"
         | 
| 87 | 
            +
                            "  - Is at most 63 characters" % s
         | 
| 88 | 
            +
                        )
         | 
| 89 | 
            +
                    return True
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                return all([validate_label(v) for v in labels.values()]) if labels else True
         | 
| 92 | 
            +
             | 
| 93 | 
            +
             | 
| 94 | 
            +
            def parse_kube_keyvalue_list(items: List[str], requires_both: bool = True):
         | 
| 95 | 
            +
                try:
         | 
| 96 | 
            +
                    ret = {}
         | 
| 97 | 
            +
                    for item_str in items:
         | 
| 98 | 
            +
                        item = item_str.split("=", 1)
         | 
| 99 | 
            +
                        if requires_both:
         | 
| 100 | 
            +
                            item[1]  # raise IndexError
         | 
| 101 | 
            +
                        if str(item[0]) in ret:
         | 
| 102 | 
            +
                            raise KubernetesException("Duplicate key found: %s" % str(item[0]))
         | 
| 103 | 
            +
                        ret[str(item[0])] = str(item[1]) if len(item) > 1 else None
         | 
| 104 | 
            +
                    return ret
         | 
| 105 | 
            +
                except KubernetesException as e:
         | 
| 106 | 
            +
                    raise e
         | 
| 107 | 
            +
                except (AttributeError, IndexError):
         | 
| 108 | 
            +
                    raise KubernetesException("Unable to parse kubernetes list: %s" % items)
         | 
| @@ -1,11 +1,8 @@ | |
| 1 | 
            -
            import copy
         | 
| 2 1 | 
             
            import json
         | 
| 3 2 | 
             
            import math
         | 
| 4 3 | 
             
            import os
         | 
| 5 | 
            -
            import re
         | 
| 6 4 | 
             
            import shlex
         | 
| 7 5 | 
             
            import time
         | 
| 8 | 
            -
            from typing import Dict, List, Optional
         | 
| 9 6 | 
             
            from uuid import uuid4
         | 
| 10 7 |  | 
| 11 8 | 
             
            from metaflow import current, util
         | 
| @@ -35,7 +32,6 @@ from metaflow.metaflow_config import ( | |
| 35 32 | 
             
                DEFAULT_SECRETS_BACKEND_TYPE,
         | 
| 36 33 | 
             
                GCP_SECRET_MANAGER_PREFIX,
         | 
| 37 34 | 
             
                KUBERNETES_FETCH_EC2_METADATA,
         | 
| 38 | 
            -
                KUBERNETES_LABELS,
         | 
| 39 35 | 
             
                KUBERNETES_SANDBOX_INIT_SCRIPT,
         | 
| 40 36 | 
             
                OTEL_ENDPOINT,
         | 
| 41 37 | 
             
                S3_ENDPOINT_URL,
         | 
| @@ -193,6 +189,7 @@ class Kubernetes(object): | |
| 193 189 | 
             
                    persistent_volume_claims=None,
         | 
| 194 190 | 
             
                    tolerations=None,
         | 
| 195 191 | 
             
                    labels=None,
         | 
| 192 | 
            +
                    annotations=None,
         | 
| 196 193 | 
             
                    shared_memory=None,
         | 
| 197 194 | 
             
                    port=None,
         | 
| 198 195 | 
             
                    num_parallel=None,
         | 
| @@ -304,10 +301,6 @@ class Kubernetes(object): | |
| 304 301 | 
             
                        # see get_datastore_root_from_config in datastore/local.py).
         | 
| 305 302 | 
             
                    )
         | 
| 306 303 |  | 
| 307 | 
            -
                    _labels = self._get_labels(labels)
         | 
| 308 | 
            -
                    for k, v in _labels.items():
         | 
| 309 | 
            -
                        jobset.label(k, v)
         | 
| 310 | 
            -
             | 
| 311 304 | 
             
                    for k in list(
         | 
| 312 305 | 
             
                        [] if not secrets else [secrets] if isinstance(secrets, str) else secrets
         | 
| 313 306 | 
             
                    ) + KUBERNETES_SECRETS.split(","):
         | 
| @@ -379,13 +372,16 @@ class Kubernetes(object): | |
| 379 372 | 
             
                    for name, value in env.items():
         | 
| 380 373 | 
             
                        jobset.environment_variable(name, value)
         | 
| 381 374 |  | 
| 382 | 
            -
                     | 
| 375 | 
            +
                    system_annotations = {
         | 
| 383 376 | 
             
                        "metaflow/user": user,
         | 
| 384 377 | 
             
                        "metaflow/flow_name": flow_name,
         | 
| 385 378 | 
             
                        "metaflow/control-task-id": task_id,
         | 
| 379 | 
            +
                        "metaflow/run_id": run_id,
         | 
| 380 | 
            +
                        "metaflow/step_name": step_name,
         | 
| 381 | 
            +
                        "metaflow/attempt": attempt,
         | 
| 386 382 | 
             
                    }
         | 
| 387 383 | 
             
                    if current.get("project_name"):
         | 
| 388 | 
            -
                         | 
| 384 | 
            +
                        system_annotations.update(
         | 
| 389 385 | 
             
                            {
         | 
| 390 386 | 
             
                                "metaflow/project_name": current.project_name,
         | 
| 391 387 | 
             
                                "metaflow/branch_name": current.branch_name,
         | 
| @@ -393,15 +389,15 @@ class Kubernetes(object): | |
| 393 389 | 
             
                            }
         | 
| 394 390 | 
             
                        )
         | 
| 395 391 |  | 
| 396 | 
            -
                     | 
| 397 | 
            -
                         | 
| 392 | 
            +
                    system_labels = {
         | 
| 393 | 
            +
                        "app.kubernetes.io/name": "metaflow-task",
         | 
| 394 | 
            +
                        "app.kubernetes.io/part-of": "metaflow",
         | 
| 395 | 
            +
                    }
         | 
| 398 396 |  | 
| 399 | 
            -
                    (
         | 
| 400 | 
            -
             | 
| 401 | 
            -
             | 
| 402 | 
            -
                         | 
| 403 | 
            -
                        .label("app.kubernetes.io/name", "metaflow-task")
         | 
| 404 | 
            -
                        .label("app.kubernetes.io/part-of", "metaflow")
         | 
| 397 | 
            +
                    jobset.labels({**({} if not labels else labels), **system_labels})
         | 
| 398 | 
            +
             | 
| 399 | 
            +
                    jobset.annotations(
         | 
| 400 | 
            +
                        {**({} if not annotations else annotations), **system_annotations}
         | 
| 405 401 | 
             
                    )
         | 
| 406 402 | 
             
                    # We need this task-id set so that all the nodes are aware of the control
         | 
| 407 403 | 
             
                    # task's task-id. These "MF_" variables populate the `current.parallel` namedtuple
         | 
| @@ -491,6 +487,7 @@ class Kubernetes(object): | |
| 491 487 | 
             
                    port=None,
         | 
| 492 488 | 
             
                    name_pattern=None,
         | 
| 493 489 | 
             
                    qos=None,
         | 
| 490 | 
            +
                    annotations=None,
         | 
| 494 491 | 
             
                ):
         | 
| 495 492 | 
             
                    if env is None:
         | 
| 496 493 | 
             
                        env = {}
         | 
| @@ -523,7 +520,8 @@ class Kubernetes(object): | |
| 523 520 | 
             
                            retries=0,
         | 
| 524 521 | 
             
                            step_name=step_name,
         | 
| 525 522 | 
             
                            tolerations=tolerations,
         | 
| 526 | 
            -
                            labels= | 
| 523 | 
            +
                            labels=labels,
         | 
| 524 | 
            +
                            annotations=annotations,
         | 
| 527 525 | 
             
                            use_tmpfs=use_tmpfs,
         | 
| 528 526 | 
             
                            tmpfs_tempdir=tmpfs_tempdir,
         | 
| 529 527 | 
             
                            tmpfs_size=tmpfs_size,
         | 
| @@ -642,13 +640,25 @@ class Kubernetes(object): | |
| 642 640 |  | 
| 643 641 | 
             
                    for name, value in env.items():
         | 
| 644 642 | 
             
                        job.environment_variable(name, value)
         | 
| 643 | 
            +
                    # Add job specific labels
         | 
| 644 | 
            +
                    system_labels = {
         | 
| 645 | 
            +
                        "app.kubernetes.io/name": "metaflow-task",
         | 
| 646 | 
            +
                        "app.kubernetes.io/part-of": "metaflow",
         | 
| 647 | 
            +
                    }
         | 
| 648 | 
            +
                    for name, value in system_labels.items():
         | 
| 649 | 
            +
                        job.label(name, value)
         | 
| 645 650 |  | 
| 646 | 
            -
                    annotations  | 
| 647 | 
            -
             | 
| 651 | 
            +
                    # Add job specific annotations not set in the decorator.
         | 
| 652 | 
            +
                    system_annotations = {
         | 
| 648 653 | 
             
                        "metaflow/flow_name": flow_name,
         | 
| 654 | 
            +
                        "metaflow/run_id": run_id,
         | 
| 655 | 
            +
                        "metaflow/step_name": step_name,
         | 
| 656 | 
            +
                        "metaflow/task_id": task_id,
         | 
| 657 | 
            +
                        "metaflow/attempt": attempt,
         | 
| 658 | 
            +
                        "metaflow/user": user,
         | 
| 649 659 | 
             
                    }
         | 
| 650 660 | 
             
                    if current.get("project_name"):
         | 
| 651 | 
            -
                         | 
| 661 | 
            +
                        system_annotations.update(
         | 
| 652 662 | 
             
                            {
         | 
| 653 663 | 
             
                                "metaflow/project_name": current.project_name,
         | 
| 654 664 | 
             
                                "metaflow/branch_name": current.branch_name,
         | 
| @@ -656,7 +666,7 @@ class Kubernetes(object): | |
| 656 666 | 
             
                            }
         | 
| 657 667 | 
             
                        )
         | 
| 658 668 |  | 
| 659 | 
            -
                    for name, value in  | 
| 669 | 
            +
                    for name, value in system_annotations.items():
         | 
| 660 670 | 
             
                        job.annotation(name, value)
         | 
| 661 671 |  | 
| 662 672 | 
             
                    (
         | 
| @@ -775,60 +785,3 @@ class Kubernetes(object): | |
| 775 785 | 
             
                        "stderr",
         | 
| 776 786 | 
             
                        job_id=self._job.id,
         | 
| 777 787 | 
             
                    )
         | 
| 778 | 
            -
             | 
| 779 | 
            -
                @staticmethod
         | 
| 780 | 
            -
                def _get_labels(extra_labels=None):
         | 
| 781 | 
            -
                    if extra_labels is None:
         | 
| 782 | 
            -
                        extra_labels = {}
         | 
| 783 | 
            -
                    env_labels = KUBERNETES_LABELS.split(",") if KUBERNETES_LABELS else []
         | 
| 784 | 
            -
                    env_labels = parse_kube_keyvalue_list(env_labels, False)
         | 
| 785 | 
            -
                    labels = {**env_labels, **extra_labels}
         | 
| 786 | 
            -
                    validate_kube_labels(labels)
         | 
| 787 | 
            -
                    return labels
         | 
| 788 | 
            -
             | 
| 789 | 
            -
             | 
| 790 | 
            -
            def validate_kube_labels(
         | 
| 791 | 
            -
                labels: Optional[Dict[str, Optional[str]]],
         | 
| 792 | 
            -
            ) -> bool:
         | 
| 793 | 
            -
                """Validate label values.
         | 
| 794 | 
            -
             | 
| 795 | 
            -
                This validates the kubernetes label values.  It does not validate the keys.
         | 
| 796 | 
            -
                Ideally, keys should be static and also the validation rules for keys are
         | 
| 797 | 
            -
                more complex than those for values.  For full validation rules, see:
         | 
| 798 | 
            -
             | 
| 799 | 
            -
                https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
         | 
| 800 | 
            -
                """
         | 
| 801 | 
            -
             | 
| 802 | 
            -
                def validate_label(s: Optional[str]):
         | 
| 803 | 
            -
                    regex_match = r"^(([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9])?$"
         | 
| 804 | 
            -
                    if not s:
         | 
| 805 | 
            -
                        # allow empty label
         | 
| 806 | 
            -
                        return True
         | 
| 807 | 
            -
                    if not re.search(regex_match, s):
         | 
| 808 | 
            -
                        raise KubernetesException(
         | 
| 809 | 
            -
                            'Invalid value: "%s"\n'
         | 
| 810 | 
            -
                            "A valid label must be an empty string or one that\n"
         | 
| 811 | 
            -
                            "  - Consist of alphanumeric, '-', '_' or '.' characters\n"
         | 
| 812 | 
            -
                            "  - Begins and ends with an alphanumeric character\n"
         | 
| 813 | 
            -
                            "  - Is at most 63 characters" % s
         | 
| 814 | 
            -
                        )
         | 
| 815 | 
            -
                    return True
         | 
| 816 | 
            -
             | 
| 817 | 
            -
                return all([validate_label(v) for v in labels.values()]) if labels else True
         | 
| 818 | 
            -
             | 
| 819 | 
            -
             | 
| 820 | 
            -
            def parse_kube_keyvalue_list(items: List[str], requires_both: bool = True):
         | 
| 821 | 
            -
                try:
         | 
| 822 | 
            -
                    ret = {}
         | 
| 823 | 
            -
                    for item_str in items:
         | 
| 824 | 
            -
                        item = item_str.split("=", 1)
         | 
| 825 | 
            -
                        if requires_both:
         | 
| 826 | 
            -
                            item[1]  # raise IndexError
         | 
| 827 | 
            -
                        if str(item[0]) in ret:
         | 
| 828 | 
            -
                            raise KubernetesException("Duplicate key found: %s" % str(item[0]))
         | 
| 829 | 
            -
                        ret[str(item[0])] = str(item[1]) if len(item) > 1 else None
         | 
| 830 | 
            -
                    return ret
         | 
| 831 | 
            -
                except KubernetesException as e:
         | 
| 832 | 
            -
                    raise e
         | 
| 833 | 
            -
                except (AttributeError, IndexError):
         | 
| 834 | 
            -
                    raise KubernetesException("Unable to parse kubernetes list: %s" % items)
         | 
| @@ -3,14 +3,17 @@ import sys | |
| 3 3 | 
             
            import time
         | 
| 4 4 | 
             
            import traceback
         | 
| 5 5 |  | 
| 6 | 
            -
            from metaflow.plugins.kubernetes.kube_utils import  | 
| 6 | 
            +
            from metaflow.plugins.kubernetes.kube_utils import (
         | 
| 7 | 
            +
                parse_cli_options,
         | 
| 8 | 
            +
                parse_kube_keyvalue_list,
         | 
| 9 | 
            +
            )
         | 
| 7 10 | 
             
            from metaflow.plugins.kubernetes.kubernetes_client import KubernetesClient
         | 
| 8 11 | 
             
            import metaflow.tracing as tracing
         | 
| 9 12 | 
             
            from metaflow import JSONTypeClass, util
         | 
| 10 13 | 
             
            from metaflow._vendor import click
         | 
| 11 14 | 
             
            from metaflow.exception import METAFLOW_EXIT_DISALLOW_RETRY, MetaflowException
         | 
| 12 15 | 
             
            from metaflow.metadata_provider.util import sync_local_metadata_from_datastore
         | 
| 13 | 
            -
            from metaflow.metaflow_config import DATASTORE_LOCAL_DIR | 
| 16 | 
            +
            from metaflow.metaflow_config import DATASTORE_LOCAL_DIR
         | 
| 14 17 | 
             
            from metaflow.mflog import TASK_LOG_SOURCE
         | 
| 15 18 | 
             
            from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
         | 
| 16 19 |  | 
| @@ -18,9 +21,7 @@ from .kubernetes import ( | |
| 18 21 | 
             
                Kubernetes,
         | 
| 19 22 | 
             
                KubernetesException,
         | 
| 20 23 | 
             
                KubernetesKilledException,
         | 
| 21 | 
            -
                parse_kube_keyvalue_list,
         | 
| 22 24 | 
             
            )
         | 
| 23 | 
            -
            from .kubernetes_decorator import KubernetesDecorator
         | 
| 24 25 |  | 
| 25 26 |  | 
| 26 27 | 
             
            @click.group()
         | 
| @@ -132,6 +133,18 @@ def kubernetes(): | |
| 132 133 | 
             
                type=str,
         | 
| 133 134 | 
             
                help="Quality of Service class for the Kubernetes pod",
         | 
| 134 135 | 
             
            )
         | 
| 136 | 
            +
            @click.option(
         | 
| 137 | 
            +
                "--labels",
         | 
| 138 | 
            +
                default=None,
         | 
| 139 | 
            +
                type=JSONTypeClass(),
         | 
| 140 | 
            +
                multiple=False,
         | 
| 141 | 
            +
            )
         | 
| 142 | 
            +
            @click.option(
         | 
| 143 | 
            +
                "--annotations",
         | 
| 144 | 
            +
                default=None,
         | 
| 145 | 
            +
                type=JSONTypeClass(),
         | 
| 146 | 
            +
                multiple=False,
         | 
| 147 | 
            +
            )
         | 
| 135 148 | 
             
            @click.pass_context
         | 
| 136 149 | 
             
            def step(
         | 
| 137 150 | 
             
                ctx,
         | 
| @@ -161,6 +174,8 @@ def step( | |
| 161 174 | 
             
                port=None,
         | 
| 162 175 | 
             
                num_parallel=None,
         | 
| 163 176 | 
             
                qos=None,
         | 
| 177 | 
            +
                labels=None,
         | 
| 178 | 
            +
                annotations=None,
         | 
| 164 179 | 
             
                **kwargs
         | 
| 165 180 | 
             
            ):
         | 
| 166 181 | 
             
                def echo(msg, stream="stderr", job_id=None, **kwargs):
         | 
| @@ -302,8 +317,10 @@ def step( | |
| 302 317 | 
             
                            port=port,
         | 
| 303 318 | 
             
                            num_parallel=num_parallel,
         | 
| 304 319 | 
             
                            qos=qos,
         | 
| 320 | 
            +
                            labels=labels,
         | 
| 321 | 
            +
                            annotations=annotations,
         | 
| 305 322 | 
             
                        )
         | 
| 306 | 
            -
                except Exception | 
| 323 | 
            +
                except Exception:
         | 
| 307 324 | 
             
                    traceback.print_exc(chain=False)
         | 
| 308 325 | 
             
                    _sync_metadata()
         | 
| 309 326 | 
             
                    sys.exit(METAFLOW_EXIT_DISALLOW_RETRY)
         | 
| @@ -19,6 +19,8 @@ from metaflow.metaflow_config import ( | |
| 19 19 | 
             
                KUBERNETES_GPU_VENDOR,
         | 
| 20 20 | 
             
                KUBERNETES_IMAGE_PULL_POLICY,
         | 
| 21 21 | 
             
                KUBERNETES_MEMORY,
         | 
| 22 | 
            +
                KUBERNETES_LABELS,
         | 
| 23 | 
            +
                KUBERNETES_ANNOTATIONS,
         | 
| 22 24 | 
             
                KUBERNETES_NAMESPACE,
         | 
| 23 25 | 
             
                KUBERNETES_NODE_SELECTOR,
         | 
| 24 26 | 
             
                KUBERNETES_PERSISTENT_VOLUME_CLAIMS,
         | 
| @@ -34,7 +36,8 @@ from metaflow.sidecar import Sidecar | |
| 34 36 | 
             
            from metaflow.unbounded_foreach import UBF_CONTROL
         | 
| 35 37 |  | 
| 36 38 | 
             
            from ..aws.aws_utils import get_docker_registry, get_ec2_instance_metadata
         | 
| 37 | 
            -
            from .kubernetes import KubernetesException | 
| 39 | 
            +
            from .kubernetes import KubernetesException
         | 
| 40 | 
            +
            from .kube_utils import validate_kube_labels, parse_kube_keyvalue_list
         | 
| 38 41 |  | 
| 39 42 | 
             
            try:
         | 
| 40 43 | 
             
                unicode
         | 
| @@ -89,6 +92,10 @@ class KubernetesDecorator(StepDecorator): | |
| 89 92 | 
             
                tolerations : List[str], default []
         | 
| 90 93 | 
             
                    The default is extracted from METAFLOW_KUBERNETES_TOLERATIONS.
         | 
| 91 94 | 
             
                    Kubernetes tolerations to use when launching pod in Kubernetes.
         | 
| 95 | 
            +
                labels: Dict[str, str], default: METAFLOW_KUBERNETES_LABELS
         | 
| 96 | 
            +
                    Kubernetes labels to use when launching pod in Kubernetes.
         | 
| 97 | 
            +
                annotations: Dict[str, str], default: METAFLOW_KUBERNETES_ANNOTATIONS
         | 
| 98 | 
            +
                    Kubernetes annotations to use when launching pod in Kubernetes.
         | 
| 92 99 | 
             
                use_tmpfs : bool, default False
         | 
| 93 100 | 
             
                    This enables an explicit tmpfs mount for this step.
         | 
| 94 101 | 
             
                tmpfs_tempdir : bool, default True
         | 
| @@ -131,6 +138,8 @@ class KubernetesDecorator(StepDecorator): | |
| 131 138 | 
             
                    "gpu_vendor": None,
         | 
| 132 139 | 
             
                    "tolerations": None,  # e.g., [{"key": "arch", "operator": "Equal", "value": "amd"},
         | 
| 133 140 | 
             
                    #                              {"key": "foo", "operator": "Equal", "value": "bar"}]
         | 
| 141 | 
            +
                    "labels": None,  # e.g. {"test-label": "value", "another-label":"value2"}
         | 
| 142 | 
            +
                    "annotations": None,  # e.g. {"note": "value", "another-note": "value2"}
         | 
| 134 143 | 
             
                    "use_tmpfs": None,
         | 
| 135 144 | 
             
                    "tmpfs_tempdir": True,
         | 
| 136 145 | 
             
                    "tmpfs_size": None,
         | 
| @@ -217,6 +226,36 @@ class KubernetesDecorator(StepDecorator): | |
| 217 226 | 
             
                        self.attributes["memory"] = KUBERNETES_MEMORY
         | 
| 218 227 | 
             
                    if self.attributes["disk"] == self.defaults["disk"] and KUBERNETES_DISK:
         | 
| 219 228 | 
             
                        self.attributes["disk"] = KUBERNETES_DISK
         | 
| 229 | 
            +
                    # Label source precedence (decreasing):
         | 
| 230 | 
            +
                    # - System labels (set outside of decorator)
         | 
| 231 | 
            +
                    # - Decorator labels: @kubernetes(labels={})
         | 
| 232 | 
            +
                    # - Environment variable labels: METAFLOW_KUBERNETES_LABELS=
         | 
| 233 | 
            +
                    deco_labels = {}
         | 
| 234 | 
            +
                    if self.attributes["labels"] is not None:
         | 
| 235 | 
            +
                        deco_labels = self.attributes["labels"]
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                    env_labels = {}
         | 
| 238 | 
            +
                    if KUBERNETES_LABELS:
         | 
| 239 | 
            +
                        env_labels = parse_kube_keyvalue_list(KUBERNETES_LABELS.split(","), False)
         | 
| 240 | 
            +
             | 
| 241 | 
            +
                    self.attributes["labels"] = {**env_labels, **deco_labels}
         | 
| 242 | 
            +
             | 
| 243 | 
            +
                    # Annotations
         | 
| 244 | 
            +
                    # annotation precedence (decreasing):
         | 
| 245 | 
            +
                    # - System annotations (set outside of decorator)
         | 
| 246 | 
            +
                    # - Decorator annotations: @kubernetes(annotations={})
         | 
| 247 | 
            +
                    # - Environment annotations: METAFLOW_KUBERNETES_ANNOTATIONS=
         | 
| 248 | 
            +
                    deco_annotations = {}
         | 
| 249 | 
            +
                    if self.attributes["annotations"] is not None:
         | 
| 250 | 
            +
                        deco_annotations = self.attributes["annotations"]
         | 
| 251 | 
            +
             | 
| 252 | 
            +
                    env_annotations = {}
         | 
| 253 | 
            +
                    if KUBERNETES_ANNOTATIONS:
         | 
| 254 | 
            +
                        env_annotations = parse_kube_keyvalue_list(
         | 
| 255 | 
            +
                            KUBERNETES_ANNOTATIONS.split(","), False
         | 
| 256 | 
            +
                        )
         | 
| 257 | 
            +
             | 
| 258 | 
            +
                    self.attributes["annotations"] = {**env_annotations, **deco_annotations}
         | 
| 220 259 |  | 
| 221 260 | 
             
                    # If no docker image is explicitly specified, impute a default image.
         | 
| 222 261 | 
             
                    if not self.attributes["image"]:
         | 
| @@ -371,6 +410,9 @@ class KubernetesDecorator(StepDecorator): | |
| 371 410 | 
             
                                )
         | 
| 372 411 | 
             
                            )
         | 
| 373 412 |  | 
| 413 | 
            +
                    validate_kube_labels(self.attributes["labels"])
         | 
| 414 | 
            +
                    # TODO: add validation to annotations as well?
         | 
| 415 | 
            +
             | 
| 374 416 | 
             
                def package_init(self, flow, step_name, environment):
         | 
| 375 417 | 
             
                    try:
         | 
| 376 418 | 
             
                        # Kubernetes is a soft dependency.
         | 
| @@ -426,7 +468,12 @@ class KubernetesDecorator(StepDecorator): | |
| 426 468 | 
             
                                    "=".join([key, str(val)]) if val else key
         | 
| 427 469 | 
             
                                    for key, val in v.items()
         | 
| 428 470 | 
             
                                ]
         | 
| 429 | 
            -
                            elif k in [ | 
| 471 | 
            +
                            elif k in [
         | 
| 472 | 
            +
                                "tolerations",
         | 
| 473 | 
            +
                                "persistent_volume_claims",
         | 
| 474 | 
            +
                                "labels",
         | 
| 475 | 
            +
                                "annotations",
         | 
| 476 | 
            +
                            ]:
         | 
| 430 477 | 
             
                                cli_args.command_options[k] = json.dumps(v)
         | 
| 431 478 | 
             
                            else:
         | 
| 432 479 | 
             
                                cli_args.command_options[k] = v
         | 
| @@ -1,22 +1,19 @@ | |
| 1 | 
            -
            import copy
         | 
| 2 1 | 
             
            import json
         | 
| 3 2 | 
             
            import math
         | 
| 4 3 | 
             
            import random
         | 
| 5 | 
            -
            import sys
         | 
| 6 4 | 
             
            import time
         | 
| 7 5 |  | 
| 8 6 | 
             
            from metaflow.exception import MetaflowException
         | 
| 9 7 | 
             
            from metaflow.metaflow_config import KUBERNETES_SECRETS
         | 
| 10 8 | 
             
            from metaflow.tracing import inject_tracing_vars
         | 
| 11 | 
            -
            from metaflow.unbounded_foreach import UBF_CONTROL, UBF_TASK
         | 
| 12 9 |  | 
| 13 10 | 
             
            CLIENT_REFRESH_INTERVAL_SECONDS = 300
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            from .kube_utils import qos_requests_and_limits
         | 
| 14 13 | 
             
            from .kubernetes_jobsets import (
         | 
| 15 14 | 
             
                KubernetesJobSet,
         | 
| 16 15 | 
             
            )  # We need this import for Kubernetes Client.
         | 
| 17 16 |  | 
| 18 | 
            -
            from .kube_utils import qos_requests_and_limits
         | 
| 19 | 
            -
             | 
| 20 17 |  | 
| 21 18 | 
             
            class KubernetesJobException(MetaflowException):
         | 
| 22 19 | 
             
                headline = "Kubernetes job error"
         | 
| @@ -430,7 +427,7 @@ class RunningJob(object): | |
| 430 427 | 
             
                    def best_effort_kill():
         | 
| 431 428 | 
             
                        try:
         | 
| 432 429 | 
             
                            self.kill()
         | 
| 433 | 
            -
                        except Exception | 
| 430 | 
            +
                        except Exception:
         | 
| 434 431 | 
             
                            pass
         | 
| 435 432 |  | 
| 436 433 | 
             
                    atexit.register(best_effort_kill)
         | 
| @@ -1,4 +1,3 @@ | |
| 1 | 
            -
            import copy
         | 
| 2 1 | 
             
            import json
         | 
| 3 2 | 
             
            import math
         | 
| 4 3 | 
             
            import random
         | 
| @@ -7,7 +6,6 @@ from collections import namedtuple | |
| 7 6 | 
             
            from metaflow.exception import MetaflowException
         | 
| 8 7 | 
             
            from metaflow.metaflow_config import KUBERNETES_JOBSET_GROUP, KUBERNETES_JOBSET_VERSION
         | 
| 9 8 | 
             
            from metaflow.tracing import inject_tracing_vars
         | 
| 10 | 
            -
            from metaflow.metaflow_config import KUBERNETES_SECRETS
         | 
| 11 9 |  | 
| 12 10 | 
             
            from .kube_utils import qos_requests_and_limits
         | 
| 13 11 |  | 
| @@ -257,7 +255,7 @@ class RunningJobSet(object): | |
| 257 255 | 
             
                    def best_effort_kill():
         | 
| 258 256 | 
             
                        try:
         | 
| 259 257 | 
             
                            self.kill()
         | 
| 260 | 
            -
                        except Exception | 
| 258 | 
            +
                        except Exception:
         | 
| 261 259 | 
             
                            pass
         | 
| 262 260 |  | 
| 263 261 | 
             
                    atexit.register(best_effort_kill)
         | 
| @@ -342,7 +340,7 @@ class RunningJobSet(object): | |
| 342 340 | 
             
                            stdout=True,
         | 
| 343 341 | 
             
                            tty=False,
         | 
| 344 342 | 
             
                        )
         | 
| 345 | 
            -
                    except Exception | 
| 343 | 
            +
                    except Exception:
         | 
| 346 344 | 
             
                        with client.ApiClient() as api_client:
         | 
| 347 345 | 
             
                            # If we are unable to kill the control pod then
         | 
| 348 346 | 
             
                            # Delete the jobset to kill the subsequent pods.
         | 
| @@ -862,6 +860,16 @@ class KubernetesJobSet(object): | |
| 862 860 | 
             
                    self._annotations = dict(self._annotations, **{name: value})
         | 
| 863 861 | 
             
                    return self
         | 
| 864 862 |  | 
| 863 | 
            +
                def labels(self, labels):
         | 
| 864 | 
            +
                    for k, v in labels.items():
         | 
| 865 | 
            +
                        self.label(k, v)
         | 
| 866 | 
            +
                    return self
         | 
| 867 | 
            +
             | 
| 868 | 
            +
                def annotations(self, annotations):
         | 
| 869 | 
            +
                    for k, v in annotations.items():
         | 
| 870 | 
            +
                        self.annotation(k, v)
         | 
| 871 | 
            +
                    return self
         | 
| 872 | 
            +
             | 
| 865 873 | 
             
                def secret(self, name):
         | 
| 866 874 | 
             
                    self.worker.secret(name)
         | 
| 867 875 | 
             
                    self.control.secret(name)
         | 
| @@ -987,15 +995,24 @@ class KubernetesArgoJobSet(object): | |
| 987 995 | 
             
                    self._labels = dict(self._labels, **{name: value})
         | 
| 988 996 | 
             
                    return self
         | 
| 989 997 |  | 
| 998 | 
            +
                def labels(self, labels):
         | 
| 999 | 
            +
                    for k, v in labels.items():
         | 
| 1000 | 
            +
                        self.label(k, v)
         | 
| 1001 | 
            +
                    return self
         | 
| 1002 | 
            +
             | 
| 990 1003 | 
             
                def annotation(self, name, value):
         | 
| 991 1004 | 
             
                    self.worker.annotation(name, value)
         | 
| 992 1005 | 
             
                    self.control.annotation(name, value)
         | 
| 993 1006 | 
             
                    self._annotations = dict(self._annotations, **{name: value})
         | 
| 994 1007 | 
             
                    return self
         | 
| 995 1008 |  | 
| 1009 | 
            +
                def annotations(self, annotations):
         | 
| 1010 | 
            +
                    for k, v in annotations.items():
         | 
| 1011 | 
            +
                        self.annotation(k, v)
         | 
| 1012 | 
            +
                    return self
         | 
| 1013 | 
            +
             | 
| 996 1014 | 
             
                def dump(self):
         | 
| 997 1015 | 
             
                    client = self._kubernetes_sdk
         | 
| 998 | 
            -
                    import json
         | 
| 999 1016 |  | 
| 1000 1017 | 
             
                    data = json.dumps(
         | 
| 1001 1018 | 
             
                        client.ApiClient().sanitize_for_serialization(
         |