paasta-tools 1.21.4__py3-none-any.whl → 1.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. paasta_tools/__init__.py +1 -1
  2. paasta_tools/api/api.py +5 -0
  3. paasta_tools/api/api_docs/swagger.json +95 -0
  4. paasta_tools/api/views/autoscaler.py +122 -0
  5. paasta_tools/cli/cmds/autoscale.py +126 -7
  6. paasta_tools/cli/cmds/remote_run.py +40 -4
  7. paasta_tools/cli/cmds/spark_run.py +15 -0
  8. paasta_tools/kubernetes/application/controller_wrappers.py +16 -2
  9. paasta_tools/kubernetes/remote_run.py +2 -0
  10. paasta_tools/kubernetes_tools.py +77 -1
  11. paasta_tools/paastaapi/api/autoscaler_api.py +140 -1
  12. paasta_tools/paastaapi/model/autoscaling_override.py +180 -0
  13. paasta_tools/paastaapi/model/inline_response202.py +182 -0
  14. paasta_tools/paastaapi/models/__init__.py +2 -0
  15. paasta_tools/setup_kubernetes_job.py +105 -1
  16. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/setup_kubernetes_job.py +105 -1
  17. {paasta_tools-1.21.4.dist-info → paasta_tools-1.23.0.dist-info}/METADATA +2 -2
  18. {paasta_tools-1.21.4.dist-info → paasta_tools-1.23.0.dist-info}/RECORD +73 -71
  19. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/apply_external_resources.py +0 -0
  20. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/bounce_log_latency_parser.py +0 -0
  21. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_autoscaler_max_instances.py +0 -0
  22. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_cassandracluster_services_replication.py +0 -0
  23. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_flink_services_health.py +0 -0
  24. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_kubernetes_api.py +0 -0
  25. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_kubernetes_services_replication.py +0 -0
  26. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_manual_oapi_changes.sh +0 -0
  27. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_oom_events.py +0 -0
  28. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_orphans.py +0 -0
  29. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_spark_jobs.py +0 -0
  30. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/cleanup_kubernetes_cr.py +0 -0
  31. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/cleanup_kubernetes_crd.py +0 -0
  32. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/cleanup_kubernetes_jobs.py +0 -0
  33. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/create_dynamodb_table.py +0 -0
  34. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/create_paasta_playground.py +0 -0
  35. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/delete_kubernetes_deployments.py +0 -0
  36. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/emit_allocated_cpu_metrics.py +0 -0
  37. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/generate_all_deployments +0 -0
  38. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/generate_authenticating_services.py +0 -0
  39. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/generate_deployments_for_service.py +0 -0
  40. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/generate_services_file.py +0 -0
  41. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/generate_services_yaml.py +0 -0
  42. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/get_running_task_allocation.py +0 -0
  43. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/habitat_fixer.py +0 -0
  44. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/ide_helper.py +0 -0
  45. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/is_pod_healthy_in_proxy.py +0 -0
  46. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/is_pod_healthy_in_smartstack.py +0 -0
  47. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/kill_bad_containers.py +0 -0
  48. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/kubernetes_remove_evicted_pods.py +0 -0
  49. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/mass-deploy-tag.sh +0 -0
  50. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/mock_patch_checker.py +0 -0
  51. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_cleanup_remote_run_resources.py +0 -0
  52. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_cleanup_stale_nodes.py +0 -0
  53. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_deploy_tron_jobs +0 -0
  54. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_execute_docker_command.py +0 -0
  55. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_secrets_sync.py +0 -0
  56. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_tabcomplete.sh +0 -0
  57. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_update_soa_memcpu.py +0 -0
  58. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/render_template.py +0 -0
  59. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/rightsizer_soaconfigs_update.py +0 -0
  60. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/service_shard_remove.py +0 -0
  61. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/service_shard_update.py +0 -0
  62. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/setup_istio_mesh.py +0 -0
  63. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/setup_kubernetes_cr.py +0 -0
  64. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/setup_kubernetes_crd.py +0 -0
  65. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/setup_kubernetes_internal_crd.py +0 -0
  66. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/setup_prometheus_adapter_config.py +0 -0
  67. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/shared_ip_check.py +0 -0
  68. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/synapse_srv_namespaces_fact.py +0 -0
  69. {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/timeouts_metrics_prom.py +0 -0
  70. {paasta_tools-1.21.4.dist-info → paasta_tools-1.23.0.dist-info}/LICENSE +0 -0
  71. {paasta_tools-1.21.4.dist-info → paasta_tools-1.23.0.dist-info}/WHEEL +0 -0
  72. {paasta_tools-1.21.4.dist-info → paasta_tools-1.23.0.dist-info}/entry_points.txt +0 -0
  73. {paasta_tools-1.21.4.dist-info → paasta_tools-1.23.0.dist-info}/top_level.txt +0 -0
paasta_tools/__init__.py CHANGED
@@ -17,4 +17,4 @@
17
17
  # setup phase, the dependencies may not exist on disk yet.
18
18
  #
19
19
  # Don't bump version manually. See `make release` docs in ./Makefile
20
- __version__ = "1.21.4"
20
+ __version__ = "1.23.0"
paasta_tools/api/api.py CHANGED
@@ -189,6 +189,11 @@ def make_app(global_config=None):
189
189
  "/v1/service_autoscaler/pause",
190
190
  request_method="GET",
191
191
  )
192
+ config.add_route(
193
+ "service_autoscaler.autoscaling_override.post",
194
+ "/v1/service_autoscaler/{service}/{instance}/autoscaling_override",
195
+ request_method="POST",
196
+ )
192
197
  config.add_route(
193
198
  "remote_run.start",
194
199
  "/v1/remote_run/{service}/{instance}/start",
@@ -279,6 +279,79 @@
279
279
  "operationId": "delete_service_autoscaler_pause"
280
280
  }
281
281
  },
282
+ "/service_autoscaler/{service}/{instance}/autoscaling_override": {
283
+ "post": {
284
+ "operationId": "set_autoscaling_override",
285
+ "parameters": [
286
+ {
287
+ "description": "Service name",
288
+ "in": "path",
289
+ "name": "service",
290
+ "required": true,
291
+ "type": "string"
292
+ },
293
+ {
294
+ "description": "Instance name",
295
+ "in": "path",
296
+ "name": "instance",
297
+ "required": true,
298
+ "type": "string"
299
+ },
300
+ {
301
+ "in": "body",
302
+ "name": "json_body",
303
+ "required": true,
304
+ "schema": {
305
+ "$ref": "#/definitions/AutoscalingOverride"
306
+ }
307
+ }
308
+ ],
309
+ "responses": {
310
+ "202": {
311
+ "description": "Successfully set autoscaling override",
312
+ "schema": {
313
+ "type": "object",
314
+ "properties": {
315
+ "service": {
316
+ "type": "string",
317
+ "description": "Service name"
318
+ },
319
+ "instance": {
320
+ "type": "string",
321
+ "description": "Instance name"
322
+ },
323
+ "min_instances": {
324
+ "type": "integer",
325
+ "description": "Minimum number of instances to run"
326
+ },
327
+ "expire_after": {
328
+ "type": "number",
329
+ "format": "float",
330
+ "description": "Unix timestamp after which the override is no longer valid"
331
+ },
332
+ "status": {
333
+ "type": "string",
334
+ "description": "Status of the operation"
335
+ }
336
+ }
337
+ }
338
+ },
339
+ "400": {
340
+ "description": "Invalid request parameters"
341
+ },
342
+ "501": {
343
+ "description": "Autoscaling not supported for this instance type"
344
+ },
345
+ "500": {
346
+ "description": "Failed to set autoscaling override"
347
+ }
348
+ },
349
+ "summary": "Set a temporary autoscaling override for a service instance",
350
+ "tags": [
351
+ "autoscaler"
352
+ ]
353
+ }
354
+ },
282
355
  "/resources/utilization": {
283
356
  "get": {
284
357
  "responses": {
@@ -964,6 +1037,9 @@
964
1037
  "404": {
965
1038
  "description": "Service instance not found"
966
1039
  },
1040
+ "409": {
1041
+ "description": "A pod was found but is currently being terminated"
1042
+ },
967
1043
  "500": {
968
1044
  "description": "Failure"
969
1045
  }
@@ -1627,6 +1703,25 @@
1627
1703
  }
1628
1704
  }
1629
1705
  },
1706
+ "AutoscalingOverride": {
1707
+ "type": "object",
1708
+ "properties": {
1709
+ "min_instances": {
1710
+ "type": "integer",
1711
+ "description": "Minimum number of instances to run",
1712
+ "minimum": 1
1713
+ },
1714
+ "expire_after": {
1715
+ "type": "number",
1716
+ "format": "float",
1717
+ "description": "Unix timestamp when this override is no longer valid"
1718
+ }
1719
+ },
1720
+ "required": [
1721
+ "min_instances",
1722
+ "expire_after"
1723
+ ]
1724
+ },
1630
1725
  "KubernetesReplicaSet": {
1631
1726
  "type": "object",
1632
1727
  "properties": {
@@ -15,13 +15,27 @@
15
15
  """
16
16
  PaaSTA service list (instances) etc.
17
17
  """
18
+ import json
19
+ import logging
20
+ from datetime import datetime
21
+ from datetime import timezone
22
+ from typing import Tuple
23
+
24
+ from kubernetes.client import V1ConfigMap
18
25
  from pyramid.response import Response
19
26
  from pyramid.view import view_config
20
27
 
21
28
  from paasta_tools.api import settings
22
29
  from paasta_tools.api.views.exception import ApiFailure
23
30
  from paasta_tools.cli.utils import get_instance_config
31
+ from paasta_tools.kubernetes_tools import AUTOSCALING_OVERRIDES_CONFIGMAP_NAME
32
+ from paasta_tools.kubernetes_tools import AUTOSCALING_OVERRIDES_CONFIGMAP_NAMESPACE
33
+ from paasta_tools.kubernetes_tools import get_or_create_namespaced_configmap
24
34
  from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
35
+ from paasta_tools.kubernetes_tools import patch_namespaced_configmap
36
+
37
+
38
+ log = logging.getLogger(__name__)
25
39
 
26
40
 
27
41
  @view_config(route_name="service.autoscaler.get", request_method="GET", renderer="json")
@@ -98,3 +112,111 @@ def update_autoscaler_count(request):
98
112
 
99
113
  response_body = {"desired_instances": desired_instances, "status": status}
100
114
  return Response(json_body=response_body, status_code=202)
115
+
116
+
117
+ def get_or_create_autoscaling_overrides_configmap() -> Tuple[V1ConfigMap, bool]:
118
+ return get_or_create_namespaced_configmap(
119
+ AUTOSCALING_OVERRIDES_CONFIGMAP_NAME,
120
+ namespace=AUTOSCALING_OVERRIDES_CONFIGMAP_NAMESPACE,
121
+ kube_client=settings.kubernetes_client,
122
+ )
123
+
124
+
125
+ @view_config(
126
+ route_name="service_autoscaler.autoscaling_override.post",
127
+ request_method="POST",
128
+ renderer="json",
129
+ )
130
+ def set_autoscaling_override(request):
131
+ """Set a temporary autoscaling override for a service/instance.
132
+
133
+ This endpoint creates or updates a ConfigMap entry with override information
134
+ including expiration time. The override will be applied by the autoscaler.
135
+
136
+ Required parameters:
137
+ - service: The service name
138
+ - instance: The instance name
139
+ - min_instances: The minimum number of instances to enforce
140
+ - expires_after: unix timestamp after which the override is no longer valid
141
+ """
142
+ service = request.swagger_data.get("service")
143
+ instance = request.swagger_data.get("instance")
144
+ cluster = settings.cluster
145
+ soa_dir = settings.soa_dir
146
+
147
+ instance_config = get_instance_config(
148
+ service, instance, cluster, soa_dir, load_deployments=False
149
+ )
150
+ if not isinstance(instance_config, KubernetesDeploymentConfig):
151
+ error_message = (
152
+ f"Autoscaling is not supported for {service}.{instance} because instance type is not "
153
+ f"kubernetes."
154
+ )
155
+ raise ApiFailure(error_message, 501)
156
+
157
+ json_body = request.swagger_data.get("json_body", {})
158
+ min_instances_override = json_body.get("min_instances")
159
+ expire_after = json_body.get("expire_after")
160
+
161
+ if not isinstance(min_instances_override, int) or min_instances_override < 1:
162
+ raise ApiFailure("min_instances must be a positive integer", 400)
163
+
164
+ if not expire_after:
165
+ raise ApiFailure("expire_after is required", 400)
166
+
167
+ max_instances = instance_config.get_max_instances()
168
+ if max_instances is None:
169
+ raise ApiFailure(f"Autoscaling is not enabled for {service}.{instance}", 400)
170
+
171
+ if max_instances < min_instances_override:
172
+ raise ApiFailure(
173
+ f"min_instances ({min_instances_override}) cannot be greater than max_instances ({max_instances})",
174
+ 400,
175
+ )
176
+
177
+ configmap, created = get_or_create_autoscaling_overrides_configmap()
178
+ if created:
179
+ log.info("Created new autoscaling overrides ConfigMap")
180
+ # i dunno why this is necessary, but a newly created configmap doesn't have a data field
181
+ # even when we set it in the create call
182
+ if not configmap.data:
183
+ configmap.data = {}
184
+
185
+ override_data = {
186
+ "min_instances": min_instances_override,
187
+ "created_at": datetime.now(timezone.utc).isoformat(),
188
+ # NOTE: we may want to also allow setting a max_instances override in the future, but if we do that
189
+ # we'd probably want to force folks to either set one or both and share the same expiration time
190
+ "expire_after": expire_after,
191
+ }
192
+
193
+ service_instance = f"{service}.{instance}"
194
+ existing_overrides = (
195
+ json.loads(configmap.data[service_instance])
196
+ if service_instance in configmap.data
197
+ else {}
198
+ )
199
+ merged_overrides = {**existing_overrides, **override_data}
200
+ serialized_overrides = json.dumps(merged_overrides)
201
+
202
+ patch_namespaced_configmap(
203
+ name=AUTOSCALING_OVERRIDES_CONFIGMAP_NAME,
204
+ namespace=AUTOSCALING_OVERRIDES_CONFIGMAP_NAMESPACE,
205
+ # this should only update the single entry for the $service.$instance key
206
+ # ain't k8s grand?
207
+ body={"data": {service_instance: serialized_overrides}},
208
+ kube_client=settings.kubernetes_client,
209
+ )
210
+
211
+ response_body = {
212
+ "service": service,
213
+ "instance": instance,
214
+ "cluster": cluster,
215
+ "min_instances": min_instances_override,
216
+ "expire_after": expire_after,
217
+ "status": "SUCCESS",
218
+ }
219
+ # NOTE: this is an HTTP 202 since actually updating the HPA happens asynchronously
220
+ # through setup_kubernetes_job
221
+ # XXX: should we try to patch things here as well?
222
+ return Response(json_body=response_body, status_code=202)
@@ -13,6 +13,12 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
  import logging
16
+ import time
17
+ from datetime import datetime
18
+ from typing import Optional
19
+
20
+ import pytz
21
+ from pytimeparse.timeparse import timeparse
16
22
 
17
23
  import paasta_tools.paastaapi.models as paastamodels
18
24
  from paasta_tools.api import client
@@ -58,6 +64,24 @@ def add_subparser(subparsers):
58
64
  autoscale_parser.add_argument(
59
65
  "--set", help="Set the number to scale to. Must be an Int.", type=int
60
66
  )
67
+
68
+ # Temporary override options
69
+ override_group = autoscale_parser.add_argument_group("Temporary Override Options")
70
+ override_group.add_argument(
71
+ "--set-min",
72
+ help="Set the minimum number of replicas (must be >= 1). Requires --for parameter.",
73
+ type=lambda x: int(x)
74
+ if int(x) >= 1
75
+ else autoscale_parser.error("Minimum instances must be >= 1"),
76
+ default=None,
77
+ )
78
+ override_group.add_argument(
79
+ "--for",
80
+ dest="duration",
81
+ help="Duration for the temporary override (e.g. '3h', '30m'). Required when using --set-min.",
82
+ default=None,
83
+ )
84
+
61
85
  autoscale_parser.add_argument(
62
86
  "-d",
63
87
  "--soa-dir",
@@ -69,9 +93,46 @@ def add_subparser(subparsers):
69
93
  autoscale_parser.set_defaults(command=paasta_autoscale)
70
94
 
71
95
 
96
+ def parse_duration_to_seconds(duration: str) -> Optional[int]:
97
+ """Parse a duration string like '3h' or '30m' into seconds.
98
+
99
+ Args:
100
+ duration_str: A string representing a duration (e.g., "3h", "30m", "1d")
101
+
102
+ Returns:
103
+ The duration in seconds, or None if parsing failed
104
+ """
105
+ if not duration:
106
+ return None
107
+
108
+ seconds = timeparse(duration)
109
+ return seconds
110
+
111
+
72
112
  def paasta_autoscale(args):
73
113
  log.setLevel(logging.DEBUG)
74
114
  service = figure_out_service_name(args)
115
+
116
+ if args.set_min is not None and not args.duration:
117
+ print(
118
+ PaastaColors.yellow(
119
+ "WARNING: --set-min requires --for parameter to specify duration - defaulting to 30m"
120
+ )
121
+ )
122
+ args.duration = "30m"
123
+
124
+ if args.duration is not None and args.set_min is None:
125
+ print(PaastaColors.red("Error: --for requires --set-min parameter"))
126
+ return 1
127
+
128
+ if args.set is not None and args.set_min is not None:
129
+ print(
130
+ PaastaColors.red(
131
+ "Error: Cannot use both --set and --set-min at the same time"
132
+ )
133
+ )
134
+ return 1
135
+
75
136
  instance_config = next(
76
137
  get_instance_configs_for_service(
77
138
  service=service,
@@ -83,7 +144,7 @@ def paasta_autoscale(args):
83
144
  )
84
145
  if not instance_config:
85
146
  print(
86
- "Could not find config files for this service instance in soaconfigs. Maybe you mispelled an argument?"
147
+ "Could not find config files for this service instance in soaconfigs. Maybe you misspelled an argument?"
87
148
  )
88
149
  return 1
89
150
 
@@ -99,12 +160,15 @@ def paasta_autoscale(args):
99
160
  return 1
100
161
 
101
162
  try:
102
- if args.set is None:
163
+ # get current autoscaler count
164
+ if args.set is None and args.set_min is None:
103
165
  log.debug("Getting the current autoscaler count...")
104
166
  res, status, _ = api.autoscaler.get_autoscaler_count(
105
167
  service=service, instance=args.instance, _return_http_data_only=False
106
168
  )
107
- else:
169
+
170
+ # set desired instances
171
+ elif args.set is not None:
108
172
  log.debug(f"Setting desired instances to {args.set}.")
109
173
  msg = paastamodels.AutoscalerCountMsg(desired_instances=int(args.set))
110
174
  res, status, _ = api.autoscaler.update_autoscaler_count(
@@ -121,16 +185,53 @@ def paasta_autoscale(args):
121
185
  instance=args.instance,
122
186
  cluster=args.cluster,
123
187
  )
188
+
189
+ # set lower bound
190
+ elif args.set_min is not None:
191
+ duration_seconds = parse_duration_to_seconds(args.duration)
192
+ if not duration_seconds:
193
+ print(
194
+ PaastaColors.red(
195
+ f"Error: Invalid duration format '{args.duration}'. "
196
+ f"Please use a format like '3h' or '30m'."
197
+ )
198
+ )
199
+ return 1
200
+ # NOTE: this is explicitly using time.time() since we're doing everything using epoch time
201
+ # for simplicity
202
+ expiration_time = time.time() + duration_seconds
203
+
204
+ log.debug(
205
+ f"Setting minimum instances to {args.set_min} for duration {args.duration}."
206
+ )
207
+ msg = paastamodels.AutoscalingOverride(
208
+ min_instances=args.set_min,
209
+ expire_after=expiration_time,
210
+ )
211
+
212
+ res, status, _ = api.autoscaler.set_autoscaling_override(
213
+ service=service,
214
+ instance=args.instance,
215
+ autoscaling_override=msg,
216
+ _return_http_data_only=False,
217
+ )
218
+ _log_audit(
219
+ action="manual-autoscale-override",
220
+ action_details=str(msg),
221
+ service=service,
222
+ instance=args.instance,
223
+ cluster=args.cluster,
224
+ )
124
225
  except api.api_error as exc:
125
226
  status = exc.status
126
227
 
127
228
  if not 200 <= status <= 299:
128
229
  print(
129
230
  PaastaColors.red(
130
- f"ERROR: '{args.instance}' is not configured to autoscale, "
131
- f"so paasta autoscale could not scale it up on demand. "
231
+ f"ERROR: '{args.instance}' is not configured to autoscale OR you set min_instances above the current max_instances, "
232
+ f"and `paasta autoscale` could not update it. "
132
233
  f"If you want to be able to boost this service, please configure autoscaling for the service "
133
- f"in its config file by setting min and max instances. Example: \n"
234
+ f"in its config file by setting min and max instances appropriately. Example: \n"
134
235
  f"{args.instance}:\n"
135
236
  f" min_instances: 5\n"
136
237
  f" max_instances: 50"
@@ -139,5 +240,23 @@ def paasta_autoscale(args):
139
240
  return 0
140
241
 
141
242
  log.debug(f"Res: {res} Http: {status}")
142
- print(res.desired_instances)
243
+ if not args.set_min:
244
+ print(f"Desired instances: {res.desired_instances}")
245
+ elif args.set_min:
246
+ print(
247
+ f"Temporary override set for {args.service}.{args.instance} with minimum instances: {args.set_min}"
248
+ )
249
+ # folks using this might be in different timezones, so let's convert the expiration time to a few common ones
250
+ # to make it extra clear when the override will expire
251
+ epoch_time = datetime.fromtimestamp(res.expire_after)
252
+ eastern_time = epoch_time.astimezone(pytz.timezone("US/Eastern"))
253
+ pacific_time = epoch_time.astimezone(pytz.timezone("US/Pacific"))
254
+ london_time = epoch_time.astimezone(pytz.timezone("Europe/London"))
255
+
256
+ time_format = "%Y-%m-%d %H:%M:%S %Z%z"
257
+ print(f"The {args.duration} override will expire at:")
258
+ print(f"Eastern Time: {eastern_time.strftime(time_format)}")
259
+ print(f"Pacific Time: {pacific_time.strftime(time_format)}")
260
+ print(f"London Time: {london_time.strftime(time_format)}")
261
+
143
262
  return 0
@@ -14,6 +14,8 @@
14
14
  # limitations under the License.
15
15
  import argparse
16
16
  import shutil
17
+ import subprocess
18
+ import sys
17
19
  import time
18
20
  from typing import List
19
21
 
@@ -32,9 +34,12 @@ from paasta_tools.utils import load_system_paasta_config
32
34
  from paasta_tools.utils import SystemPaastaConfig
33
35
 
34
36
 
35
- KUBECTL_CMD_TEMPLATE = (
37
+ KUBECTL_EXEC_CMD_TEMPLATE = (
36
38
  "{kubectl_wrapper} --token {token} exec -it -n {namespace} {pod} -- /bin/bash"
37
39
  )
40
+ KUBECTL_CP_CMD_TEMPLATE = (
41
+ "{kubectl_wrapper} --token {token} -n {namespace} cp {filename} {pod}:/tmp/"
42
+ )
38
43
 
39
44
 
40
45
  def _list_services_and_toolboxes() -> List[str]:
@@ -55,7 +60,9 @@ def _list_services_and_toolboxes() -> List[str]:
55
60
  def paasta_remote_run_start(
56
61
  args: argparse.Namespace,
57
62
  system_paasta_config: SystemPaastaConfig,
63
+ recursed: bool = False,
58
64
  ) -> int:
65
+ status_prefix = "\x1b[2K\r" # Clear line, carriage return
59
66
  client = get_paasta_oapi_client_with_auth(
60
67
  cluster=get_paasta_oapi_api_clustername(cluster=args.cluster, is_eks=True),
61
68
  system_paasta_config=system_paasta_config,
@@ -95,10 +102,18 @@ def paasta_remote_run_start(
95
102
  if poll_response.status == 200:
96
103
  print("")
97
104
  break
98
- print(f"\rStatus: {poll_response.message}", end="")
105
+ print(f"{status_prefix}Status: {poll_response.message}", end="")
106
+ if poll_response.status == 404:
107
+ # Probably indicates a pod was terminating. Now that its gone, retry the whole process
108
+ if not recursed:
109
+ print("\nPod finished terminating. Rerunning")
110
+ return paasta_remote_run_start(args, system_paasta_config, True)
111
+ else:
112
+ print("\nSomething went wrong. Pod still not found.")
113
+ return 1
99
114
  time.sleep(10)
100
115
  else:
101
- print("Timed out while waiting for job to start")
116
+ print(f"{status_prefix}Timed out while waiting for job to start")
102
117
  return 1
103
118
 
104
119
  if not args.interactive and not args.toolbox:
@@ -120,13 +135,28 @@ def paasta_remote_run_start(
120
135
  kubectl_wrapper = f"kubectl-eks-{args.cluster}"
121
136
  if not shutil.which(kubectl_wrapper):
122
137
  kubectl_wrapper = f"kubectl-{args.cluster}"
123
- exec_command = KUBECTL_CMD_TEMPLATE.format(
138
+ exec_command = KUBECTL_EXEC_CMD_TEMPLATE.format(
124
139
  kubectl_wrapper=kubectl_wrapper,
125
140
  namespace=poll_response.namespace,
126
141
  pod=poll_response.pod_name,
127
142
  token=token_response.token,
128
143
  )
129
144
 
145
+ if args.copy_file:
146
+ for filename in args.copy_file:
147
+ cp_command = KUBECTL_CP_CMD_TEMPLATE.format(
148
+ kubectl_wrapper=kubectl_wrapper,
149
+ namespace=poll_response.namespace,
150
+ pod=poll_response.pod_name,
151
+ filename=filename,
152
+ token=token_response.token,
153
+ ).split(" ")
154
+ call = subprocess.run(cp_command, capture_output=True)
155
+ if call.returncode != 0:
156
+ print("Error copying file to remote-run pod: ", file=sys.stderr)
157
+ print(call.stderr.decode("utf-8"), file=sys.stderr)
158
+ return 1
159
+
130
160
  run_interactive_cli(exec_command)
131
161
  return 0
132
162
 
@@ -232,6 +262,12 @@ def add_subparser(subparsers: argparse._SubParsersAction) -> None:
232
262
  type=int,
233
263
  default=600,
234
264
  )
265
+ start_parser.add_argument(
266
+ "--copy-file",
267
+ help="Adds a local file to /tmp inside the pod",
268
+ type=str,
269
+ action="append",
270
+ )
235
271
  stop_parser = subparsers.add_parser(
236
272
  "stop",
237
273
  help="Stop your remote-run job if it exists",
@@ -375,6 +375,20 @@ def add_subparser(subparsers):
375
375
  default=False,
376
376
  )
377
377
 
378
+ list_parser.add_argument(
379
+ "--jira-ticket",
380
+ help=(
381
+ "The top level jira ticket used to track the project that this spark-job is related to. "
382
+ "eg: --jira-ticket=PROJ-123. "
383
+ "Must be passed for all adhoc jobs. "
384
+ "See https://yelpwiki.yelpcorp.com/spaces/AML/pages/402885641. "
385
+ ),
386
+ type=str,
387
+ required=False,
388
+ dest="jira_ticket",
389
+ default=None,
390
+ )
391
+
378
392
  aws_group = list_parser.add_argument_group(
379
393
  title="AWS credentials options",
380
394
  description="If --aws-credentials-yaml is specified, it overrides all "
@@ -1383,6 +1397,7 @@ def paasta_spark_run(args: argparse.Namespace) -> int:
1383
1397
  use_eks=True,
1384
1398
  k8s_server_address=k8s_server_address,
1385
1399
  service_account_name=service_account_name,
1400
+ jira_ticket=args.jira_ticket,
1386
1401
  )
1387
1402
 
1388
1403
  return configure_and_run_docker_container(
@@ -18,6 +18,7 @@ from paasta_tools.kubernetes_tools import create_job
18
18
  from paasta_tools.kubernetes_tools import create_pod_disruption_budget
19
19
  from paasta_tools.kubernetes_tools import create_stateful_set
20
20
  from paasta_tools.kubernetes_tools import ensure_service_account
21
+ from paasta_tools.kubernetes_tools import HpaOverride
21
22
  from paasta_tools.kubernetes_tools import KubeClient
22
23
  from paasta_tools.kubernetes_tools import KubeDeployment
23
24
  from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
@@ -219,6 +220,15 @@ class Application(ABC):
219
220
 
220
221
 
221
222
  class DeploymentWrapper(Application):
223
+ def __init__(
224
+ self,
225
+ item: Union[V1Deployment, V1StatefulSet],
226
+ logging=logging.getLogger(__name__),
227
+ hpa_override: Optional[HpaOverride] = None,
228
+ ) -> None:
229
+ super().__init__(item, logging)
230
+ self.hpa_override = hpa_override
231
+
222
232
  def deep_delete(
223
233
  self, kube_client: KubeClient, propagation_policy="Foreground"
224
234
  ) -> None:
@@ -290,6 +300,9 @@ class DeploymentWrapper(Application):
290
300
  cluster=self.soa_config.cluster,
291
301
  kube_client=kube_client,
292
302
  namespace=self.item.metadata.namespace,
303
+ min_instances_override=(
304
+ self.hpa_override["min_instances"] if self.hpa_override else None
305
+ ),
293
306
  )
294
307
 
295
308
  hpa_exists = self.exists_hpa(kube_client)
@@ -461,11 +474,12 @@ class JobWrapper(Application):
461
474
 
462
475
 
463
476
  def get_application_wrapper(
464
- formatted_application: Union[V1Deployment, V1StatefulSet, V1Job]
477
+ formatted_application: Union[V1Deployment, V1StatefulSet, V1Job],
478
+ hpa_override: Optional[HpaOverride] = None,
465
479
  ) -> Application:
466
480
  app: Application
467
481
  if isinstance(formatted_application, V1Deployment):
468
- app = DeploymentWrapper(formatted_application)
482
+ app = DeploymentWrapper(formatted_application, hpa_override=hpa_override)
469
483
  elif isinstance(formatted_application, V1StatefulSet):
470
484
  app = StatefulSetWrapper(formatted_application)
471
485
  elif isinstance(formatted_application, V1Job):
@@ -189,6 +189,8 @@ def remote_run_ready(
189
189
  if not pod:
190
190
  return {"status": 404, "message": "No pod found"}
191
191
  if pod.status.phase == "Running":
192
+ if pod.metadata.deletion_timestamp:
193
+ return {"status": 409, "message": "Pod is terminating"}
192
194
  result: RemoteRunOutcome = {
193
195
  "status": 200,
194
196
  "message": "Pod ready",