PyPI - metaflow - Versions diffs - 2.11.1__py2.py3-none-any.whl → 2.11.3__py2.py3-none-any.whl - Mend

metaflow 2.11.1py2.py3-none-any.whl → 2.11.3py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

metaflow/flowspec.py +7 -3
metaflow/metaflow_config.py +11 -1
metaflow/parameters.py +6 -0
metaflow/plugins/argo/argo_workflows.py +101 -23
metaflow/plugins/aws/batch/batch.py +2 -0
metaflow/plugins/aws/batch/batch_client.py +10 -2
metaflow/plugins/aws/step_functions/dynamo_db_client.py +28 -6
metaflow/plugins/aws/step_functions/production_token.py +1 -1
metaflow/plugins/aws/step_functions/step_functions.py +219 -4
metaflow/plugins/aws/step_functions/step_functions_cli.py +104 -6
metaflow/plugins/aws/step_functions/step_functions_client.py +8 -3
metaflow/plugins/aws/step_functions/step_functions_decorator.py +1 -1
metaflow/plugins/cards/card_cli.py +2 -2
metaflow/plugins/kubernetes/kubernetes.py +2 -0
metaflow/plugins/kubernetes/kubernetes_cli.py +3 -0
metaflow/plugins/kubernetes/kubernetes_client.py +10 -2
metaflow/plugins/kubernetes/kubernetes_decorator.py +17 -0
metaflow/plugins/kubernetes/kubernetes_job.py +27 -0
metaflow/plugins/pypi/bootstrap.py +1 -1
metaflow/plugins/pypi/conda_decorator.py +21 -1
metaflow/plugins/pypi/conda_environment.py +21 -4
metaflow/version.py +1 -1
{metaflow-2.11.1.dist-info → metaflow-2.11.3.dist-info}/METADATA +2 -2
{metaflow-2.11.1.dist-info → metaflow-2.11.3.dist-info}/RECORD +28 -28
{metaflow-2.11.1.dist-info → metaflow-2.11.3.dist-info}/LICENSE +0 -0
{metaflow-2.11.1.dist-info → metaflow-2.11.3.dist-info}/WHEEL +0 -0
{metaflow-2.11.1.dist-info → metaflow-2.11.3.dist-info}/entry_points.txt +0 -0
{metaflow-2.11.1.dist-info → metaflow-2.11.3.dist-info}/top_level.txt +0 -0

metaflow/flowspec.py CHANGED Viewed

@@ -17,7 +17,7 @@ from .exception import (
 )
 from .graph import FlowGraph
 from .unbounded_foreach import UnboundedForeachInput
-from .metaflow_config import INCLUDE_FOREACH_STACK
+from .metaflow_config import INCLUDE_FOREACH_STACK, MAXIMUM_FOREACH_VALUE_CHARS
 # For Python 3 compatibility
 try:
@@ -28,6 +28,8 @@ except NameError:
 from .datastore.inputs import Inputs
+INTERNAL_ARTIFACTS_SET = set(["_foreach_values"])
 class InvalidNextException(MetaflowException):
     headline = "Invalid self.next() transition detected"
@@ -446,7 +448,9 @@ class FlowSpec(object):
                 available_vars = (
                     (var, sha)
                     for var, sha in inp._datastore.items()
-                    if (var not in exclude) and (not hasattr(self, var))
+                    if (var not in exclude)
+                    and (not hasattr(self, var))
+                    and (var not in INTERNAL_ARTIFACTS_SET)
                 )
             for var, sha in available_vars:
                 _, previous_sha = to_merge.setdefault(var, (inp, sha))
@@ -529,7 +533,7 @@ class FlowSpec(object):
             )
         value = item if _is_primitive_type(item) else reprlib.Repr().repr(item)
-        return basestring(value)
+        return basestring(value)[:MAXIMUM_FOREACH_VALUE_CHARS]
     def next(self, *dsts: Callable[..., None], **kwargs) -> None:
         """

metaflow/metaflow_config.py CHANGED Viewed

@@ -205,6 +205,8 @@ DEFAULT_CONTAINER_REGISTRY = from_conf("DEFAULT_CONTAINER_REGISTRY")
 # Controls whether to include foreach stack information in metadata.
 # TODO(Darin, 05/01/24): Remove this flag once we are confident with this feature.
 INCLUDE_FOREACH_STACK = from_conf("INCLUDE_FOREACH_STACK", False)
+# Maximum length of the foreach value string to be stored in each ForeachFrame.
+MAXIMUM_FOREACH_VALUE_CHARS = from_conf("MAXIMUM_FOREACH_VALUE_CHARS", 30)
 ###
 # Organization customizations
@@ -268,7 +270,13 @@ SFN_STATE_MACHINE_PREFIX = from_conf("SFN_STATE_MACHINE_PREFIX")
 # machine execution logs. This needs to be available when using the
 # `step-functions create --log-execution-history` command.
 SFN_EXECUTION_LOG_GROUP_ARN = from_conf("SFN_EXECUTION_LOG_GROUP_ARN")
+# Amazon S3 path for storing the results of AWS Step Functions Distributed Map
+SFN_S3_DISTRIBUTED_MAP_OUTPUT_PATH = from_conf(
+    "SFN_S3_DISTRIBUTED_MAP_OUTPUT_PATH",
+    os.path.join(DATASTORE_SYSROOT_S3, "sfn_distributed_map_output")
+    if DATASTORE_SYSROOT_S3
+    else None,
+)
 ###
 # Kubernetes configuration
 ###
@@ -299,6 +307,8 @@ KUBERNETES_CONTAINER_REGISTRY = from_conf(
 )
 # Toggle for trying to fetch EC2 instance metadata
 KUBERNETES_FETCH_EC2_METADATA = from_conf("KUBERNETES_FETCH_EC2_METADATA", False)
+# Shared memory in MB to use for this step
+KUBERNETES_SHARED_MEMORY = from_conf("KUBERNETES_SHARED_MEMORY", None)
 ARGO_WORKFLOWS_KUBERNETES_SECRETS = from_conf("ARGO_WORKFLOWS_KUBERNETES_SECRETS", "")
 ARGO_WORKFLOWS_ENV_VARS_TO_SKIP = from_conf("ARGO_WORKFLOWS_ENV_VARS_TO_SKIP", "")

metaflow/parameters.py CHANGED Viewed

@@ -331,6 +331,12 @@ class Parameter(object):
                 "for string parameters." % name
             )
+    def __repr__(self):
+        return "metaflow.Parameter(name=%s, kwargs=%s)" % (name, kwargs)
+    def __str__(self):
+        return "metaflow.Parameter(name=%s, kwargs=%s)" % (name, kwargs)
     def option_kwargs(self, deploy_mode):
         kwargs = self.kwargs
         if isinstance(kwargs.get("default"), DeployTimeField) and not deploy_mode:

metaflow/plugins/argo/argo_workflows.py CHANGED Viewed

@@ -227,8 +227,8 @@ class ArgoWorkflows(object):
         return schedule_deleted, sensor_deleted, workflow_deleted
-    @staticmethod
-    def terminate(flow_name, name):
+    @classmethod
+    def terminate(cls, flow_name, name):
         client = ArgoClient(namespace=KUBERNETES_NAMESPACE)
         response = client.terminate_workflow(name)
@@ -1368,6 +1368,9 @@ class ArgoWorkflows(object):
             tmpfs_size = resources["tmpfs_size"]
             tmpfs_path = resources["tmpfs_path"]
             tmpfs_tempdir = resources["tmpfs_tempdir"]
+            # Set shared_memory to 0 if it isn't specified. This results
+            # in Kubernetes using it's default value when the pod is created.
+            shared_memory = resources.get("shared_memory", 0)
             tmpfs_enabled = use_tmpfs or (tmpfs_size and not use_tmpfs)
@@ -1412,6 +1415,7 @@ class ArgoWorkflows(object):
                     medium="Memory",
                     size_limit=tmpfs_size if tmpfs_enabled else 0,
                 )
+                .empty_dir_volume("dhsm", medium="Memory", size_limit=shared_memory)
                 .pvc_volumes(resources.get("persistent_volume_claims"))
                 # Set node selectors
                 .node_selectors(resources.get("node_selector"))
@@ -1505,6 +1509,17 @@ class ArgoWorkflows(object):
                                 if tmpfs_enabled
                                 else []
                             )
+                            # Support shared_memory
+                            + (
+                                [
+                                    kubernetes_sdk.V1VolumeMount(
+                                        name="dhsm",
+                                        mount_path="/dev/shm",
+                                    )
+                                ]
+                                if shared_memory
+                                else []
+                            )
                             # Support persistent volume claims.
                             + (
                                 [
@@ -1525,7 +1540,6 @@ class ArgoWorkflows(object):
     # Return exit hook templates for workflow execution notifications.
     def _exit_hook_templates(self):
-        # TODO: Add details to slack message
         templates = []
         if self.notify_on_error:
             templates.append(self._slack_error_template())
@@ -1634,36 +1648,100 @@ class ArgoWorkflows(object):
         return links
+    def _get_slack_blocks(self, message):
+        """
+        Use Slack's Block Kit to add general information about the environment and
+        execution metadata, including a link to the UI and an optional message.
+        """
+        ui_link = "%s%s/argo-{{workflow.name}}" % (UI_URL, self.flow.name)
+        # fmt: off
+        if getattr(current, "project_name", None):
+            # Add @project metadata when available.
+            environment_details_block = {
+                "type": "section",
+                "text": {
+                    "type": "mrkdwn",
+                    "text": ":metaflow: Environment details"
+                },
+                "fields": [
+                    {
+                        "type": "mrkdwn",
+                        "text": "*Project:* %s" % current.project_name
+                    },
+                    {
+                        "type": "mrkdwn",
+                        "text": "*Project Branch:* %s" % current.branch_name
+                    }
+                ]
+            }
+        else:
+            environment_details_block = {
+                "type": "section",
+                "text": {
+                    "type": "mrkdwn",
+                    "text": ":metaflow: Environment details"
+                }
+            }
+        blocks = [
+            environment_details_block,
+            {
+                "type": "context",
+                "elements": [
+                    {
+                        "type": "mrkdwn",
+                        "text": " :information_source: *<%s>*" % ui_link,
+                    }
+                ],
+            },
+            {
+                "type": "divider"
+            },
+        ]
+        if message:
+            blocks += [
+                {
+                    "type": "section",
+                    "text": {
+                        "type": "mrkdwn",
+                        "text": message
+                    }
+                }
+            ]
+        # fmt: on
+        return blocks
     def _slack_error_template(self):
         if self.notify_slack_webhook_url is None:
             return None
+        message = (
+            ":rotating_light: _%s/argo-{{workflow.name}}_ failed!" % self.flow.name
+        )
+        payload = {"text": message}
+        if UI_URL:
+            blocks = self._get_slack_blocks(message)
+            payload = {"text": message, "blocks": blocks}
         return Template("notify-slack-on-error").http(
-            Http("POST")
-            .url(self.notify_slack_webhook_url)
-            .body(
-                json.dumps(
-                    {
-                        "text": ":rotating_light: _%s/argo-{{workflow.name}}_ failed!"
-                        % self.flow.name
-                    }
-                )
-            )
+            Http("POST").url(self.notify_slack_webhook_url).body(json.dumps(payload))
         )
     def _slack_success_template(self):
         if self.notify_slack_webhook_url is None:
             return None
+        message = (
+            ":white_check_mark: _%s/argo-{{workflow.name}}_ succeeded!" % self.flow.name
+        )
+        payload = {"text": message}
+        if UI_URL:
+            blocks = self._get_slack_blocks(message)
+            payload = {"text": message, "blocks": blocks}
         return Template("notify-slack-on-success").http(
-            Http("POST")
-            .url(self.notify_slack_webhook_url)
-            .body(
-                json.dumps(
-                    {
-                        "text": ":white_check_mark: _%s/argo-{{workflow.name}}_ succeeded!"
-                        % self.flow.name
-                    }
-                )
-            )
+            Http("POST").url(self.notify_slack_webhook_url).body(json.dumps(payload))
         )
     def _compile_sensor(self):

metaflow/plugins/aws/batch/batch.py CHANGED Viewed

@@ -11,6 +11,7 @@ from metaflow.plugins.datatools.s3.s3tail import S3Tail
 from metaflow.plugins.aws.aws_utils import sanitize_batch_tag
 from metaflow.exception import MetaflowException
 from metaflow.metaflow_config import (
+    OTEL_ENDPOINT,
     SERVICE_INTERNAL_URL,
     DATATOOLS_S3ROOT,
     DATASTORE_SYSROOT_S3,
@@ -255,6 +256,7 @@ class Batch(object):
             .environment_variable("METAFLOW_DEFAULT_DATASTORE", "s3")
             .environment_variable("METAFLOW_DEFAULT_METADATA", DEFAULT_METADATA)
             .environment_variable("METAFLOW_CARD_S3ROOT", CARD_S3ROOT)
+            .environment_variable("METAFLOW_OTEL_ENDPOINT", OTEL_ENDPOINT)
             .environment_variable("METAFLOW_RUNTIME_ENVIRONMENT", "aws-batch")
         )

metaflow/plugins/aws/batch/batch_client.py CHANGED Viewed

@@ -271,7 +271,7 @@ class BatchJob(object):
                         {
                             "containerPath": "/dev/neuron{}".format(i),
                             "hostPath": "/dev/neuron{}".format(i),
-                            "permissions": ["read", "write"],
+                            "permissions": ["READ", "WRITE"],
                         }
                     )
@@ -344,7 +344,15 @@ class BatchJob(object):
                     "Invalid efa value: ({}) (should be 0 or greater)".format(efa)
                 )
             else:
-                job_definition["containerProperties"]["linuxParameters"]["devices"] = []
+                if "linuxParameters" not in job_definition["containerProperties"]:
+                    job_definition["containerProperties"]["linuxParameters"] = {}
+                if (
+                    "devices"
+                    not in job_definition["containerProperties"]["linuxParameters"]
+                ):
+                    job_definition["containerProperties"]["linuxParameters"][
+                        "devices"
+                    ] = []
                 if (num_parallel or 0) > 1:
                     # Multi-node parallel jobs require the container path and permissions explicitly specified in Job definition
                     for i in range(int(efa)):

metaflow/plugins/aws/step_functions/dynamo_db_client.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import os
+import time
 import requests
 from metaflow.metaflow_config import SFN_DYNAMO_DB_TABLE
@@ -25,12 +28,31 @@ class DynamoDbClient(object):
     def save_parent_task_id_for_foreach_join(
         self, foreach_split_task_id, foreach_join_parent_task_id
     ):
-        return self._client.update_item(
-            TableName=self.name,
-            Key={"pathspec": {"S": foreach_split_task_id}},
-            UpdateExpression="ADD parent_task_ids_for_foreach_join :val",
-            ExpressionAttributeValues={":val": {"SS": [foreach_join_parent_task_id]}},
-        )
+        ex = None
+        for attempt in range(10):
+            try:
+                return self._client.update_item(
+                    TableName=self.name,
+                    Key={"pathspec": {"S": foreach_split_task_id}},
+                    UpdateExpression="ADD parent_task_ids_for_foreach_join :val",
+                    ExpressionAttributeValues={
+                        ":val": {"SS": [foreach_join_parent_task_id]}
+                    },
+                )
+            except self._client.exceptions.ClientError as error:
+                ex = error
+                if (
+                    error.response["Error"]["Code"]
+                    == "ProvisionedThroughputExceededException"
+                ):
+                    # hopefully, enough time for AWS to scale up! otherwise
+                    # ensure sufficient on-demand throughput for dynamo db
+                    # is provisioned ahead of time
+                    sleep_time = min((2**attempt) * 10, 60)
+                    time.sleep(sleep_time)
+                else:
+                    raise
+        raise ex
     def get_parent_task_ids_for_foreach_join(self, foreach_split_task_id):
         response = self._client.get_item(

metaflow/plugins/aws/step_functions/production_token.py CHANGED Viewed

@@ -1,5 +1,5 @@
-import os
 import json
+import os
 import random
 import string
 import zlib

metaflow 2.11.1__py2.py3-none-any.whl → 2.11.3__py2.py3-none-any.whl

metaflow 2.11.1py2.py3-none-any.whl → 2.11.3py2.py3-none-any.whl