PyPI - paasta-tools - Versions diffs - 1.21.4__py3-none-any.whl → 1.23.0__py3-none-any.whl - Mend

paasta-tools 1.21.4py3-none-any.whl → 1.23.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

paasta_tools/__init__.py CHANGED Viewed

@@ -17,4 +17,4 @@
 # setup phase, the dependencies may not exist on disk yet.
 #
 # Don't bump version manually. See `make release` docs in ./Makefile
-__version__ = "1.21.4"
+__version__ = "1.23.0"

paasta_tools/api/api.py CHANGED Viewed

@@ -189,6 +189,11 @@ def make_app(global_config=None):
         "/v1/service_autoscaler/pause",
         request_method="GET",
     )
+    config.add_route(
+        "service_autoscaler.autoscaling_override.post",
+        "/v1/service_autoscaler/{service}/{instance}/autoscaling_override",
+        request_method="POST",
+    )
     config.add_route(
         "remote_run.start",
         "/v1/remote_run/{service}/{instance}/start",

paasta_tools/api/api_docs/swagger.json CHANGED Viewed

@@ -279,6 +279,79 @@
                 "operationId": "delete_service_autoscaler_pause"
             }
         },
+        "/service_autoscaler/{service}/{instance}/autoscaling_override": {
+            "post": {
+                "operationId": "set_autoscaling_override",
+                "parameters": [
+                    {
+                        "description": "Service name",
+                        "in": "path",
+                        "name": "service",
+                        "required": true,
+                        "type": "string"
+                    },
+                    {
+                        "description": "Instance name",
+                        "in": "path",
+                        "name": "instance",
+                        "required": true,
+                        "type": "string"
+                    },
+                    {
+                        "in": "body",
+                        "name": "json_body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/AutoscalingOverride"
+                        }
+                    }
+                ],
+                "responses": {
+                    "202": {
+                        "description": "Successfully set autoscaling override",
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "service": {
+                                    "type": "string",
+                                    "description": "Service name"
+                                },
+                                "instance": {
+                                    "type": "string",
+                                    "description": "Instance name"
+                                },
+                                "min_instances": {
+                                    "type": "integer",
+                                    "description": "Minimum number of instances to run"
+                                },
+                                "expire_after": {
+                                    "type": "number",
+                                    "format": "float",
+                                    "description": "Unix timestamp after which the override is no longer valid"
+                                },
+                                "status": {
+                                    "type": "string",
+                                    "description": "Status of the operation"
+                                }
+                            }
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request parameters"
+                    },
+                    "501": {
+                        "description": "Autoscaling not supported for this instance type"
+                    },
+                    "500": {
+                        "description": "Failed to set autoscaling override"
+                    }
+                },
+                "summary": "Set a temporary autoscaling override for a service instance",
+                "tags": [
+                    "autoscaler"
+                ]
+            }
+        },
         "/resources/utilization": {
             "get": {
                 "responses": {
@@ -964,6 +1037,9 @@
                     "404": {
                         "description": "Service instance not found"
                     },
+                    "409": {
+                        "description": "A pod was found but is currently being terminated"
+                    },
                     "500": {
                         "description": "Failure"
                     }
@@ -1627,6 +1703,25 @@
                 }
             }
         },
+        "AutoscalingOverride": {
+            "type": "object",
+            "properties": {
+                "min_instances": {
+                    "type": "integer",
+                    "description": "Minimum number of instances to run",
+                    "minimum": 1
+                },
+                "expire_after": {
+                    "type": "number",
+                    "format": "float",
+                    "description": "Unix timestamp when this override is no longer valid"
+                }
+            },
+            "required": [
+                "min_instances",
+                "expire_after"
+            ]
+        },
         "KubernetesReplicaSet": {
             "type": "object",
             "properties": {

paasta_tools/api/views/autoscaler.py CHANGED Viewed

@@ -15,13 +15,27 @@
 """
 PaaSTA service list (instances) etc.
 """
+import json
+import logging
+from datetime import datetime
+from datetime import timezone
+from typing import Tuple
+from kubernetes.client import V1ConfigMap
 from pyramid.response import Response
 from pyramid.view import view_config
 from paasta_tools.api import settings
 from paasta_tools.api.views.exception import ApiFailure
 from paasta_tools.cli.utils import get_instance_config
+from paasta_tools.kubernetes_tools import AUTOSCALING_OVERRIDES_CONFIGMAP_NAME
+from paasta_tools.kubernetes_tools import AUTOSCALING_OVERRIDES_CONFIGMAP_NAMESPACE
+from paasta_tools.kubernetes_tools import get_or_create_namespaced_configmap
 from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
+from paasta_tools.kubernetes_tools import patch_namespaced_configmap
+log = logging.getLogger(__name__)
 @view_config(route_name="service.autoscaler.get", request_method="GET", renderer="json")
@@ -98,3 +112,111 @@ def update_autoscaler_count(request):
     response_body = {"desired_instances": desired_instances, "status": status}
     return Response(json_body=response_body, status_code=202)
+def get_or_create_autoscaling_overrides_configmap() -> Tuple[V1ConfigMap, bool]:
+    return get_or_create_namespaced_configmap(
+        AUTOSCALING_OVERRIDES_CONFIGMAP_NAME,
+        namespace=AUTOSCALING_OVERRIDES_CONFIGMAP_NAMESPACE,
+        kube_client=settings.kubernetes_client,
+    )
+@view_config(
+    route_name="service_autoscaler.autoscaling_override.post",
+    request_method="POST",
+    renderer="json",
+)
+def set_autoscaling_override(request):
+    """Set a temporary autoscaling override for a service/instance.
+    This endpoint creates or updates a ConfigMap entry with override information
+    including expiration time. The override will be applied by the autoscaler.
+    Required parameters:
+    - service: The service name
+    - instance: The instance name
+    - min_instances: The minimum number of instances to enforce
+    - expires_after: unix timestamp after which the override is no longer valid
+    """
+    service = request.swagger_data.get("service")
+    instance = request.swagger_data.get("instance")
+    cluster = settings.cluster
+    soa_dir = settings.soa_dir
+    instance_config = get_instance_config(
+        service, instance, cluster, soa_dir, load_deployments=False
+    )
+    if not isinstance(instance_config, KubernetesDeploymentConfig):
+        error_message = (
+            f"Autoscaling is not supported for {service}.{instance} because instance type is not "
+            f"kubernetes."
+        )
+        raise ApiFailure(error_message, 501)
+    json_body = request.swagger_data.get("json_body", {})
+    min_instances_override = json_body.get("min_instances")
+    expire_after = json_body.get("expire_after")
+    if not isinstance(min_instances_override, int) or min_instances_override < 1:
+        raise ApiFailure("min_instances must be a positive integer", 400)
+    if not expire_after:
+        raise ApiFailure("expire_after is required", 400)
+    max_instances = instance_config.get_max_instances()
+    if max_instances is None:
+        raise ApiFailure(f"Autoscaling is not enabled for {service}.{instance}", 400)
+    if max_instances < min_instances_override:
+        raise ApiFailure(
+            f"min_instances ({min_instances_override}) cannot be greater than max_instances ({max_instances})",
+            400,
+        )
+    configmap, created = get_or_create_autoscaling_overrides_configmap()
+    if created:
+        log.info("Created new autoscaling overrides ConfigMap")
+    # i dunno why this is necessary, but a newly created configmap doesn't have a data field
+    # even when we set it in the create call
+    if not configmap.data:
+        configmap.data = {}
+    override_data = {
+        "min_instances": min_instances_override,
+        "created_at": datetime.now(timezone.utc).isoformat(),
+        # NOTE: we may want to also allow setting a max_instances override in the future, but if we do that
+        # we'd probably want to force folks to either set one or both and share the same expiration time
+        "expire_after": expire_after,
+    }
+    service_instance = f"{service}.{instance}"
+    existing_overrides = (
+        json.loads(configmap.data[service_instance])
+        if service_instance in configmap.data
+        else {}
+    )
+    merged_overrides = {**existing_overrides, **override_data}
+    serialized_overrides = json.dumps(merged_overrides)
+    patch_namespaced_configmap(
+        name=AUTOSCALING_OVERRIDES_CONFIGMAP_NAME,
+        namespace=AUTOSCALING_OVERRIDES_CONFIGMAP_NAMESPACE,
+        # this should only update the single entry for the $service.$instance key
+        # ain't k8s grand?
+        body={"data": {service_instance: serialized_overrides}},
+        kube_client=settings.kubernetes_client,
+    )
+    response_body = {
+        "service": service,
+        "instance": instance,
+        "cluster": cluster,
+        "min_instances": min_instances_override,
+        "expire_after": expire_after,
+        "status": "SUCCESS",
+    }
+    # NOTE: this is an HTTP 202 since actually updating the HPA happens asynchronously
+    # through setup_kubernetes_job
+    # XXX: should we try to patch things here as well?
+    return Response(json_body=response_body, status_code=202)

paasta_tools/cli/cmds/autoscale.py CHANGED Viewed

@@ -13,6 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import logging
+import time
+from datetime import datetime
+from typing import Optional
+import pytz
+from pytimeparse.timeparse import timeparse
 import paasta_tools.paastaapi.models as paastamodels
 from paasta_tools.api import client
@@ -58,6 +64,24 @@ def add_subparser(subparsers):
     autoscale_parser.add_argument(
         "--set", help="Set the number to scale to. Must be an Int.", type=int
     )
+    # Temporary override options
+    override_group = autoscale_parser.add_argument_group("Temporary Override Options")
+    override_group.add_argument(
+        "--set-min",
+        help="Set the minimum number of replicas (must be >= 1). Requires --for parameter.",
+        type=lambda x: int(x)
+        if int(x) >= 1
+        else autoscale_parser.error("Minimum instances must be >= 1"),
+        default=None,
+    )
+    override_group.add_argument(
+        "--for",
+        dest="duration",
+        help="Duration for the temporary override (e.g. '3h', '30m'). Required when using --set-min.",
+        default=None,
+    )
     autoscale_parser.add_argument(
         "-d",
         "--soa-dir",
@@ -69,9 +93,46 @@ def add_subparser(subparsers):
     autoscale_parser.set_defaults(command=paasta_autoscale)
+def parse_duration_to_seconds(duration: str) -> Optional[int]:
+    """Parse a duration string like '3h' or '30m' into seconds.
+    Args:
+        duration_str: A string representing a duration (e.g., "3h", "30m", "1d")
+    Returns:
+        The duration in seconds, or None if parsing failed
+    """
+    if not duration:
+        return None
+    seconds = timeparse(duration)
+    return seconds
 def paasta_autoscale(args):
     log.setLevel(logging.DEBUG)
     service = figure_out_service_name(args)
+    if args.set_min is not None and not args.duration:
+        print(
+            PaastaColors.yellow(
+                "WARNING: --set-min requires --for parameter to specify duration - defaulting to 30m"
+            )
+        )
+        args.duration = "30m"
+    if args.duration is not None and args.set_min is None:
+        print(PaastaColors.red("Error: --for requires --set-min parameter"))
+        return 1
+    if args.set is not None and args.set_min is not None:
+        print(
+            PaastaColors.red(
+                "Error: Cannot use both --set and --set-min at the same time"
+            )
+        )
+        return 1
     instance_config = next(
         get_instance_configs_for_service(
             service=service,
@@ -83,7 +144,7 @@ def paasta_autoscale(args):
     )
     if not instance_config:
         print(
-            "Could not find config files for this service instance in soaconfigs. Maybe you mispelled an argument?"
+            "Could not find config files for this service instance in soaconfigs. Maybe you misspelled an argument?"
         )
         return 1
@@ -99,12 +160,15 @@ def paasta_autoscale(args):
         return 1
     try:
-        if args.set is None:
+        # get current autoscaler count
+        if args.set is None and args.set_min is None:
             log.debug("Getting the current autoscaler count...")
             res, status, _ = api.autoscaler.get_autoscaler_count(
                 service=service, instance=args.instance, _return_http_data_only=False
             )
-        else:
+        # set desired instances
+        elif args.set is not None:
             log.debug(f"Setting desired instances to {args.set}.")
             msg = paastamodels.AutoscalerCountMsg(desired_instances=int(args.set))
             res, status, _ = api.autoscaler.update_autoscaler_count(
@@ -121,16 +185,53 @@ def paasta_autoscale(args):
                 instance=args.instance,
                 cluster=args.cluster,
             )
+        # set lower bound
+        elif args.set_min is not None:
+            duration_seconds = parse_duration_to_seconds(args.duration)
+            if not duration_seconds:
+                print(
+                    PaastaColors.red(
+                        f"Error: Invalid duration format '{args.duration}'. "
+                        f"Please use a format like '3h' or '30m'."
+                    )
+                )
+                return 1
+            # NOTE: this is explicitly using time.time() since we're doing everything using epoch time
+            # for simplicity
+            expiration_time = time.time() + duration_seconds
+            log.debug(
+                f"Setting minimum instances to {args.set_min} for duration {args.duration}."
+            )
+            msg = paastamodels.AutoscalingOverride(
+                min_instances=args.set_min,
+                expire_after=expiration_time,
+            )
+            res, status, _ = api.autoscaler.set_autoscaling_override(
+                service=service,
+                instance=args.instance,
+                autoscaling_override=msg,
+                _return_http_data_only=False,
+            )
+            _log_audit(
+                action="manual-autoscale-override",
+                action_details=str(msg),
+                service=service,
+                instance=args.instance,
+                cluster=args.cluster,
+            )
     except api.api_error as exc:
         status = exc.status
     if not 200 <= status <= 299:
         print(
             PaastaColors.red(
-                f"ERROR: '{args.instance}' is not configured to autoscale, "
-                f"so paasta autoscale could not scale it up on demand. "
+                f"ERROR: '{args.instance}' is not configured to autoscale OR you set min_instances above the current max_instances, "
+                f"and `paasta autoscale` could not update it. "
                 f"If you want to be able to boost this service, please configure autoscaling for the service "
-                f"in its config file by setting min and max instances. Example: \n"
+                f"in its config file by setting min and max instances appropriately. Example: \n"
                 f"{args.instance}:\n"
                 f"     min_instances: 5\n"
                 f"     max_instances: 50"
@@ -139,5 +240,23 @@ def paasta_autoscale(args):
         return 0
     log.debug(f"Res: {res} Http: {status}")
-    print(res.desired_instances)
+    if not args.set_min:
+        print(f"Desired instances: {res.desired_instances}")
+    elif args.set_min:
+        print(
+            f"Temporary override set for {args.service}.{args.instance} with minimum instances: {args.set_min}"
+        )
+        # folks using this might be in different timezones, so let's convert the expiration time to a few common ones
+        # to make it extra clear when the override will expire
+        epoch_time = datetime.fromtimestamp(res.expire_after)
+        eastern_time = epoch_time.astimezone(pytz.timezone("US/Eastern"))
+        pacific_time = epoch_time.astimezone(pytz.timezone("US/Pacific"))
+        london_time = epoch_time.astimezone(pytz.timezone("Europe/London"))
+        time_format = "%Y-%m-%d %H:%M:%S %Z%z"
+        print(f"The {args.duration} override will expire at:")
+        print(f"Eastern Time: {eastern_time.strftime(time_format)}")
+        print(f"Pacific Time: {pacific_time.strftime(time_format)}")
+        print(f"London Time:  {london_time.strftime(time_format)}")
     return 0

paasta_tools/cli/cmds/remote_run.py CHANGED Viewed

@@ -14,6 +14,8 @@
 # limitations under the License.
 import argparse
 import shutil
+import subprocess
+import sys
 import time
 from typing import List
@@ -32,9 +34,12 @@ from paasta_tools.utils import load_system_paasta_config
 from paasta_tools.utils import SystemPaastaConfig
-KUBECTL_CMD_TEMPLATE = (
+KUBECTL_EXEC_CMD_TEMPLATE = (
     "{kubectl_wrapper} --token {token} exec -it -n {namespace} {pod} -- /bin/bash"
 )
+KUBECTL_CP_CMD_TEMPLATE = (
+    "{kubectl_wrapper} --token {token} -n {namespace} cp {filename} {pod}:/tmp/"
+)
 def _list_services_and_toolboxes() -> List[str]:
@@ -55,7 +60,9 @@ def _list_services_and_toolboxes() -> List[str]:
 def paasta_remote_run_start(
     args: argparse.Namespace,
     system_paasta_config: SystemPaastaConfig,
+    recursed: bool = False,
 ) -> int:
+    status_prefix = "\x1b[2K\r"  # Clear line, carriage return
     client = get_paasta_oapi_client_with_auth(
         cluster=get_paasta_oapi_api_clustername(cluster=args.cluster, is_eks=True),
         system_paasta_config=system_paasta_config,
@@ -95,10 +102,18 @@ def paasta_remote_run_start(
         if poll_response.status == 200:
             print("")
             break
-        print(f"\rStatus: {poll_response.message}", end="")
+        print(f"{status_prefix}Status: {poll_response.message}", end="")
+        if poll_response.status == 404:
+            # Probably indicates a pod was terminating. Now that its gone, retry the whole process
+            if not recursed:
+                print("\nPod finished terminating. Rerunning")
+                return paasta_remote_run_start(args, system_paasta_config, True)
+            else:
+                print("\nSomething went wrong. Pod still not found.")
+                return 1
         time.sleep(10)
     else:
-        print("Timed out while waiting for job to start")
+        print(f"{status_prefix}Timed out while waiting for job to start")
         return 1
     if not args.interactive and not args.toolbox:
@@ -120,13 +135,28 @@ def paasta_remote_run_start(
         kubectl_wrapper = f"kubectl-eks-{args.cluster}"
         if not shutil.which(kubectl_wrapper):
             kubectl_wrapper = f"kubectl-{args.cluster}"
-        exec_command = KUBECTL_CMD_TEMPLATE.format(
+        exec_command = KUBECTL_EXEC_CMD_TEMPLATE.format(
             kubectl_wrapper=kubectl_wrapper,
             namespace=poll_response.namespace,
             pod=poll_response.pod_name,
             token=token_response.token,
         )
+    if args.copy_file:
+        for filename in args.copy_file:
+            cp_command = KUBECTL_CP_CMD_TEMPLATE.format(
+                kubectl_wrapper=kubectl_wrapper,
+                namespace=poll_response.namespace,
+                pod=poll_response.pod_name,
+                filename=filename,
+                token=token_response.token,
+            ).split(" ")
+            call = subprocess.run(cp_command, capture_output=True)
+            if call.returncode != 0:
+                print("Error copying file to remote-run pod: ", file=sys.stderr)
+                print(call.stderr.decode("utf-8"), file=sys.stderr)
+                return 1
     run_interactive_cli(exec_command)
     return 0
@@ -232,6 +262,12 @@ def add_subparser(subparsers: argparse._SubParsersAction) -> None:
         type=int,
         default=600,
     )
+    start_parser.add_argument(
+        "--copy-file",
+        help="Adds a local file to /tmp inside the pod",
+        type=str,
+        action="append",
+    )
     stop_parser = subparsers.add_parser(
         "stop",
         help="Stop your remote-run job if it exists",

paasta_tools/cli/cmds/spark_run.py CHANGED Viewed

@@ -375,6 +375,20 @@ def add_subparser(subparsers):
         default=False,
     )
+    list_parser.add_argument(
+        "--jira-ticket",
+        help=(
+            "The top level jira ticket used to track the project that this spark-job is related to. "
+            "eg: --jira-ticket=PROJ-123. "
+            "Must be passed for all adhoc jobs. "
+            "See https://yelpwiki.yelpcorp.com/spaces/AML/pages/402885641. "
+        ),
+        type=str,
+        required=False,
+        dest="jira_ticket",
+        default=None,
+    )
     aws_group = list_parser.add_argument_group(
         title="AWS credentials options",
         description="If --aws-credentials-yaml is specified, it overrides all "
@@ -1383,6 +1397,7 @@ def paasta_spark_run(args: argparse.Namespace) -> int:
         use_eks=True,
         k8s_server_address=k8s_server_address,
         service_account_name=service_account_name,
+        jira_ticket=args.jira_ticket,
     )
     return configure_and_run_docker_container(

paasta_tools/kubernetes/application/controller_wrappers.py CHANGED Viewed

@@ -18,6 +18,7 @@ from paasta_tools.kubernetes_tools import create_job
 from paasta_tools.kubernetes_tools import create_pod_disruption_budget
 from paasta_tools.kubernetes_tools import create_stateful_set
 from paasta_tools.kubernetes_tools import ensure_service_account
+from paasta_tools.kubernetes_tools import HpaOverride
 from paasta_tools.kubernetes_tools import KubeClient
 from paasta_tools.kubernetes_tools import KubeDeployment
 from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
@@ -219,6 +220,15 @@ class Application(ABC):
 class DeploymentWrapper(Application):
+    def __init__(
+        self,
+        item: Union[V1Deployment, V1StatefulSet],
+        logging=logging.getLogger(__name__),
+        hpa_override: Optional[HpaOverride] = None,
+    ) -> None:
+        super().__init__(item, logging)
+        self.hpa_override = hpa_override
     def deep_delete(
         self, kube_client: KubeClient, propagation_policy="Foreground"
     ) -> None:
@@ -290,6 +300,9 @@ class DeploymentWrapper(Application):
             cluster=self.soa_config.cluster,
             kube_client=kube_client,
             namespace=self.item.metadata.namespace,
+            min_instances_override=(
+                self.hpa_override["min_instances"] if self.hpa_override else None
+            ),
         )
         hpa_exists = self.exists_hpa(kube_client)
@@ -461,11 +474,12 @@ class JobWrapper(Application):
 def get_application_wrapper(
-    formatted_application: Union[V1Deployment, V1StatefulSet, V1Job]
+    formatted_application: Union[V1Deployment, V1StatefulSet, V1Job],
+    hpa_override: Optional[HpaOverride] = None,
 ) -> Application:
     app: Application
     if isinstance(formatted_application, V1Deployment):
-        app = DeploymentWrapper(formatted_application)
+        app = DeploymentWrapper(formatted_application, hpa_override=hpa_override)
     elif isinstance(formatted_application, V1StatefulSet):
         app = StatefulSetWrapper(formatted_application)
     elif isinstance(formatted_application, V1Job):

paasta_tools/kubernetes/remote_run.py CHANGED Viewed

@@ -189,6 +189,8 @@ def remote_run_ready(
     if not pod:
         return {"status": 404, "message": "No pod found"}
     if pod.status.phase == "Running":
+        if pod.metadata.deletion_timestamp:
+            return {"status": 409, "message": "Pod is terminating"}
         result: RemoteRunOutcome = {
             "status": 200,
             "message": "Pod ready",

paasta-tools 1.21.4__py3-none-any.whl → 1.23.0__py3-none-any.whl

paasta-tools 1.21.4py3-none-any.whl → 1.23.0py3-none-any.whl