paasta-tools 1.21.4__py3-none-any.whl → 1.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paasta_tools/__init__.py +1 -1
- paasta_tools/api/api.py +5 -0
- paasta_tools/api/api_docs/swagger.json +95 -0
- paasta_tools/api/views/autoscaler.py +122 -0
- paasta_tools/cli/cmds/autoscale.py +126 -7
- paasta_tools/cli/cmds/remote_run.py +40 -4
- paasta_tools/cli/cmds/spark_run.py +15 -0
- paasta_tools/kubernetes/application/controller_wrappers.py +16 -2
- paasta_tools/kubernetes/remote_run.py +2 -0
- paasta_tools/kubernetes_tools.py +77 -1
- paasta_tools/paastaapi/api/autoscaler_api.py +140 -1
- paasta_tools/paastaapi/model/autoscaling_override.py +180 -0
- paasta_tools/paastaapi/model/inline_response202.py +182 -0
- paasta_tools/paastaapi/models/__init__.py +2 -0
- paasta_tools/setup_kubernetes_job.py +105 -1
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/setup_kubernetes_job.py +105 -1
- {paasta_tools-1.21.4.dist-info → paasta_tools-1.23.0.dist-info}/METADATA +2 -2
- {paasta_tools-1.21.4.dist-info → paasta_tools-1.23.0.dist-info}/RECORD +73 -71
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/apply_external_resources.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/bounce_log_latency_parser.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_autoscaler_max_instances.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_cassandracluster_services_replication.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_flink_services_health.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_kubernetes_api.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_kubernetes_services_replication.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_manual_oapi_changes.sh +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_oom_events.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_orphans.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/check_spark_jobs.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/cleanup_kubernetes_cr.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/cleanup_kubernetes_crd.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/cleanup_kubernetes_jobs.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/create_dynamodb_table.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/create_paasta_playground.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/delete_kubernetes_deployments.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/emit_allocated_cpu_metrics.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/generate_all_deployments +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/generate_authenticating_services.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/generate_deployments_for_service.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/generate_services_file.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/generate_services_yaml.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/get_running_task_allocation.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/habitat_fixer.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/ide_helper.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/is_pod_healthy_in_proxy.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/is_pod_healthy_in_smartstack.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/kill_bad_containers.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/kubernetes_remove_evicted_pods.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/mass-deploy-tag.sh +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/mock_patch_checker.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_cleanup_remote_run_resources.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_cleanup_stale_nodes.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_deploy_tron_jobs +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_execute_docker_command.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_secrets_sync.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_tabcomplete.sh +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/paasta_update_soa_memcpu.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/render_template.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/rightsizer_soaconfigs_update.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/service_shard_remove.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/service_shard_update.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/setup_istio_mesh.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/setup_kubernetes_cr.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/setup_kubernetes_crd.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/setup_kubernetes_internal_crd.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/setup_prometheus_adapter_config.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/shared_ip_check.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/synapse_srv_namespaces_fact.py +0 -0
- {paasta_tools-1.21.4.data → paasta_tools-1.23.0.data}/scripts/timeouts_metrics_prom.py +0 -0
- {paasta_tools-1.21.4.dist-info → paasta_tools-1.23.0.dist-info}/LICENSE +0 -0
- {paasta_tools-1.21.4.dist-info → paasta_tools-1.23.0.dist-info}/WHEEL +0 -0
- {paasta_tools-1.21.4.dist-info → paasta_tools-1.23.0.dist-info}/entry_points.txt +0 -0
- {paasta_tools-1.21.4.dist-info → paasta_tools-1.23.0.dist-info}/top_level.txt +0 -0
paasta_tools/__init__.py
CHANGED
paasta_tools/api/api.py
CHANGED
|
@@ -189,6 +189,11 @@ def make_app(global_config=None):
|
|
|
189
189
|
"/v1/service_autoscaler/pause",
|
|
190
190
|
request_method="GET",
|
|
191
191
|
)
|
|
192
|
+
config.add_route(
|
|
193
|
+
"service_autoscaler.autoscaling_override.post",
|
|
194
|
+
"/v1/service_autoscaler/{service}/{instance}/autoscaling_override",
|
|
195
|
+
request_method="POST",
|
|
196
|
+
)
|
|
192
197
|
config.add_route(
|
|
193
198
|
"remote_run.start",
|
|
194
199
|
"/v1/remote_run/{service}/{instance}/start",
|
|
@@ -279,6 +279,79 @@
|
|
|
279
279
|
"operationId": "delete_service_autoscaler_pause"
|
|
280
280
|
}
|
|
281
281
|
},
|
|
282
|
+
"/service_autoscaler/{service}/{instance}/autoscaling_override": {
|
|
283
|
+
"post": {
|
|
284
|
+
"operationId": "set_autoscaling_override",
|
|
285
|
+
"parameters": [
|
|
286
|
+
{
|
|
287
|
+
"description": "Service name",
|
|
288
|
+
"in": "path",
|
|
289
|
+
"name": "service",
|
|
290
|
+
"required": true,
|
|
291
|
+
"type": "string"
|
|
292
|
+
},
|
|
293
|
+
{
|
|
294
|
+
"description": "Instance name",
|
|
295
|
+
"in": "path",
|
|
296
|
+
"name": "instance",
|
|
297
|
+
"required": true,
|
|
298
|
+
"type": "string"
|
|
299
|
+
},
|
|
300
|
+
{
|
|
301
|
+
"in": "body",
|
|
302
|
+
"name": "json_body",
|
|
303
|
+
"required": true,
|
|
304
|
+
"schema": {
|
|
305
|
+
"$ref": "#/definitions/AutoscalingOverride"
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
],
|
|
309
|
+
"responses": {
|
|
310
|
+
"202": {
|
|
311
|
+
"description": "Successfully set autoscaling override",
|
|
312
|
+
"schema": {
|
|
313
|
+
"type": "object",
|
|
314
|
+
"properties": {
|
|
315
|
+
"service": {
|
|
316
|
+
"type": "string",
|
|
317
|
+
"description": "Service name"
|
|
318
|
+
},
|
|
319
|
+
"instance": {
|
|
320
|
+
"type": "string",
|
|
321
|
+
"description": "Instance name"
|
|
322
|
+
},
|
|
323
|
+
"min_instances": {
|
|
324
|
+
"type": "integer",
|
|
325
|
+
"description": "Minimum number of instances to run"
|
|
326
|
+
},
|
|
327
|
+
"expire_after": {
|
|
328
|
+
"type": "number",
|
|
329
|
+
"format": "float",
|
|
330
|
+
"description": "Unix timestamp after which the override is no longer valid"
|
|
331
|
+
},
|
|
332
|
+
"status": {
|
|
333
|
+
"type": "string",
|
|
334
|
+
"description": "Status of the operation"
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
},
|
|
339
|
+
"400": {
|
|
340
|
+
"description": "Invalid request parameters"
|
|
341
|
+
},
|
|
342
|
+
"501": {
|
|
343
|
+
"description": "Autoscaling not supported for this instance type"
|
|
344
|
+
},
|
|
345
|
+
"500": {
|
|
346
|
+
"description": "Failed to set autoscaling override"
|
|
347
|
+
}
|
|
348
|
+
},
|
|
349
|
+
"summary": "Set a temporary autoscaling override for a service instance",
|
|
350
|
+
"tags": [
|
|
351
|
+
"autoscaler"
|
|
352
|
+
]
|
|
353
|
+
}
|
|
354
|
+
},
|
|
282
355
|
"/resources/utilization": {
|
|
283
356
|
"get": {
|
|
284
357
|
"responses": {
|
|
@@ -964,6 +1037,9 @@
|
|
|
964
1037
|
"404": {
|
|
965
1038
|
"description": "Service instance not found"
|
|
966
1039
|
},
|
|
1040
|
+
"409": {
|
|
1041
|
+
"description": "A pod was found but is currently being terminated"
|
|
1042
|
+
},
|
|
967
1043
|
"500": {
|
|
968
1044
|
"description": "Failure"
|
|
969
1045
|
}
|
|
@@ -1627,6 +1703,25 @@
|
|
|
1627
1703
|
}
|
|
1628
1704
|
}
|
|
1629
1705
|
},
|
|
1706
|
+
"AutoscalingOverride": {
|
|
1707
|
+
"type": "object",
|
|
1708
|
+
"properties": {
|
|
1709
|
+
"min_instances": {
|
|
1710
|
+
"type": "integer",
|
|
1711
|
+
"description": "Minimum number of instances to run",
|
|
1712
|
+
"minimum": 1
|
|
1713
|
+
},
|
|
1714
|
+
"expire_after": {
|
|
1715
|
+
"type": "number",
|
|
1716
|
+
"format": "float",
|
|
1717
|
+
"description": "Unix timestamp when this override is no longer valid"
|
|
1718
|
+
}
|
|
1719
|
+
},
|
|
1720
|
+
"required": [
|
|
1721
|
+
"min_instances",
|
|
1722
|
+
"expire_after"
|
|
1723
|
+
]
|
|
1724
|
+
},
|
|
1630
1725
|
"KubernetesReplicaSet": {
|
|
1631
1726
|
"type": "object",
|
|
1632
1727
|
"properties": {
|
|
@@ -15,13 +15,27 @@
|
|
|
15
15
|
"""
|
|
16
16
|
PaaSTA service list (instances) etc.
|
|
17
17
|
"""
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
from datetime import datetime
|
|
21
|
+
from datetime import timezone
|
|
22
|
+
from typing import Tuple
|
|
23
|
+
|
|
24
|
+
from kubernetes.client import V1ConfigMap
|
|
18
25
|
from pyramid.response import Response
|
|
19
26
|
from pyramid.view import view_config
|
|
20
27
|
|
|
21
28
|
from paasta_tools.api import settings
|
|
22
29
|
from paasta_tools.api.views.exception import ApiFailure
|
|
23
30
|
from paasta_tools.cli.utils import get_instance_config
|
|
31
|
+
from paasta_tools.kubernetes_tools import AUTOSCALING_OVERRIDES_CONFIGMAP_NAME
|
|
32
|
+
from paasta_tools.kubernetes_tools import AUTOSCALING_OVERRIDES_CONFIGMAP_NAMESPACE
|
|
33
|
+
from paasta_tools.kubernetes_tools import get_or_create_namespaced_configmap
|
|
24
34
|
from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
|
|
35
|
+
from paasta_tools.kubernetes_tools import patch_namespaced_configmap
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
log = logging.getLogger(__name__)
|
|
25
39
|
|
|
26
40
|
|
|
27
41
|
@view_config(route_name="service.autoscaler.get", request_method="GET", renderer="json")
|
|
@@ -98,3 +112,111 @@ def update_autoscaler_count(request):
|
|
|
98
112
|
|
|
99
113
|
response_body = {"desired_instances": desired_instances, "status": status}
|
|
100
114
|
return Response(json_body=response_body, status_code=202)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_or_create_autoscaling_overrides_configmap() -> Tuple[V1ConfigMap, bool]:
|
|
118
|
+
return get_or_create_namespaced_configmap(
|
|
119
|
+
AUTOSCALING_OVERRIDES_CONFIGMAP_NAME,
|
|
120
|
+
namespace=AUTOSCALING_OVERRIDES_CONFIGMAP_NAMESPACE,
|
|
121
|
+
kube_client=settings.kubernetes_client,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@view_config(
|
|
126
|
+
route_name="service_autoscaler.autoscaling_override.post",
|
|
127
|
+
request_method="POST",
|
|
128
|
+
renderer="json",
|
|
129
|
+
)
|
|
130
|
+
def set_autoscaling_override(request):
|
|
131
|
+
"""Set a temporary autoscaling override for a service/instance.
|
|
132
|
+
|
|
133
|
+
This endpoint creates or updates a ConfigMap entry with override information
|
|
134
|
+
including expiration time. The override will be applied by the autoscaler.
|
|
135
|
+
|
|
136
|
+
Required parameters:
|
|
137
|
+
- service: The service name
|
|
138
|
+
- instance: The instance name
|
|
139
|
+
- min_instances: The minimum number of instances to enforce
|
|
140
|
+
- expires_after: unix timestamp after which the override is no longer valid
|
|
141
|
+
"""
|
|
142
|
+
service = request.swagger_data.get("service")
|
|
143
|
+
instance = request.swagger_data.get("instance")
|
|
144
|
+
cluster = settings.cluster
|
|
145
|
+
soa_dir = settings.soa_dir
|
|
146
|
+
|
|
147
|
+
instance_config = get_instance_config(
|
|
148
|
+
service, instance, cluster, soa_dir, load_deployments=False
|
|
149
|
+
)
|
|
150
|
+
if not isinstance(instance_config, KubernetesDeploymentConfig):
|
|
151
|
+
error_message = (
|
|
152
|
+
f"Autoscaling is not supported for {service}.{instance} because instance type is not "
|
|
153
|
+
f"kubernetes."
|
|
154
|
+
)
|
|
155
|
+
raise ApiFailure(error_message, 501)
|
|
156
|
+
|
|
157
|
+
json_body = request.swagger_data.get("json_body", {})
|
|
158
|
+
min_instances_override = json_body.get("min_instances")
|
|
159
|
+
expire_after = json_body.get("expire_after")
|
|
160
|
+
|
|
161
|
+
if not isinstance(min_instances_override, int) or min_instances_override < 1:
|
|
162
|
+
raise ApiFailure("min_instances must be a positive integer", 400)
|
|
163
|
+
|
|
164
|
+
if not expire_after:
|
|
165
|
+
raise ApiFailure("expire_after is required", 400)
|
|
166
|
+
|
|
167
|
+
max_instances = instance_config.get_max_instances()
|
|
168
|
+
if max_instances is None:
|
|
169
|
+
raise ApiFailure(f"Autoscaling is not enabled for {service}.{instance}", 400)
|
|
170
|
+
|
|
171
|
+
if max_instances < min_instances_override:
|
|
172
|
+
raise ApiFailure(
|
|
173
|
+
f"min_instances ({min_instances_override}) cannot be greater than max_instances ({max_instances})",
|
|
174
|
+
400,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
configmap, created = get_or_create_autoscaling_overrides_configmap()
|
|
178
|
+
if created:
|
|
179
|
+
log.info("Created new autoscaling overrides ConfigMap")
|
|
180
|
+
# i dunno why this is necessary, but a newly created configmap doesn't have a data field
|
|
181
|
+
# even when we set it in the create call
|
|
182
|
+
if not configmap.data:
|
|
183
|
+
configmap.data = {}
|
|
184
|
+
|
|
185
|
+
override_data = {
|
|
186
|
+
"min_instances": min_instances_override,
|
|
187
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
188
|
+
# NOTE: we may want to also allow setting a max_instances override in the future, but if we do that
|
|
189
|
+
# we'd probably want to force folks to either set one or both and share the same expiration time
|
|
190
|
+
"expire_after": expire_after,
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
service_instance = f"{service}.{instance}"
|
|
194
|
+
existing_overrides = (
|
|
195
|
+
json.loads(configmap.data[service_instance])
|
|
196
|
+
if service_instance in configmap.data
|
|
197
|
+
else {}
|
|
198
|
+
)
|
|
199
|
+
merged_overrides = {**existing_overrides, **override_data}
|
|
200
|
+
serialized_overrides = json.dumps(merged_overrides)
|
|
201
|
+
|
|
202
|
+
patch_namespaced_configmap(
|
|
203
|
+
name=AUTOSCALING_OVERRIDES_CONFIGMAP_NAME,
|
|
204
|
+
namespace=AUTOSCALING_OVERRIDES_CONFIGMAP_NAMESPACE,
|
|
205
|
+
# this should only update the single entry for the $service.$instance key
|
|
206
|
+
# ain't k8s grand?
|
|
207
|
+
body={"data": {service_instance: serialized_overrides}},
|
|
208
|
+
kube_client=settings.kubernetes_client,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
response_body = {
|
|
212
|
+
"service": service,
|
|
213
|
+
"instance": instance,
|
|
214
|
+
"cluster": cluster,
|
|
215
|
+
"min_instances": min_instances_override,
|
|
216
|
+
"expire_after": expire_after,
|
|
217
|
+
"status": "SUCCESS",
|
|
218
|
+
}
|
|
219
|
+
# NOTE: this is an HTTP 202 since actually updating the HPA happens asynchronously
|
|
220
|
+
# through setup_kubernetes_job
|
|
221
|
+
# XXX: should we try to patch things here as well?
|
|
222
|
+
return Response(json_body=response_body, status_code=202)
|
|
@@ -13,6 +13,12 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
import logging
|
|
16
|
+
import time
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from typing import Optional
|
|
19
|
+
|
|
20
|
+
import pytz
|
|
21
|
+
from pytimeparse.timeparse import timeparse
|
|
16
22
|
|
|
17
23
|
import paasta_tools.paastaapi.models as paastamodels
|
|
18
24
|
from paasta_tools.api import client
|
|
@@ -58,6 +64,24 @@ def add_subparser(subparsers):
|
|
|
58
64
|
autoscale_parser.add_argument(
|
|
59
65
|
"--set", help="Set the number to scale to. Must be an Int.", type=int
|
|
60
66
|
)
|
|
67
|
+
|
|
68
|
+
# Temporary override options
|
|
69
|
+
override_group = autoscale_parser.add_argument_group("Temporary Override Options")
|
|
70
|
+
override_group.add_argument(
|
|
71
|
+
"--set-min",
|
|
72
|
+
help="Set the minimum number of replicas (must be >= 1). Requires --for parameter.",
|
|
73
|
+
type=lambda x: int(x)
|
|
74
|
+
if int(x) >= 1
|
|
75
|
+
else autoscale_parser.error("Minimum instances must be >= 1"),
|
|
76
|
+
default=None,
|
|
77
|
+
)
|
|
78
|
+
override_group.add_argument(
|
|
79
|
+
"--for",
|
|
80
|
+
dest="duration",
|
|
81
|
+
help="Duration for the temporary override (e.g. '3h', '30m'). Required when using --set-min.",
|
|
82
|
+
default=None,
|
|
83
|
+
)
|
|
84
|
+
|
|
61
85
|
autoscale_parser.add_argument(
|
|
62
86
|
"-d",
|
|
63
87
|
"--soa-dir",
|
|
@@ -69,9 +93,46 @@ def add_subparser(subparsers):
|
|
|
69
93
|
autoscale_parser.set_defaults(command=paasta_autoscale)
|
|
70
94
|
|
|
71
95
|
|
|
96
|
+
def parse_duration_to_seconds(duration: str) -> Optional[int]:
|
|
97
|
+
"""Parse a duration string like '3h' or '30m' into seconds.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
duration_str: A string representing a duration (e.g., "3h", "30m", "1d")
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
The duration in seconds, or None if parsing failed
|
|
104
|
+
"""
|
|
105
|
+
if not duration:
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
seconds = timeparse(duration)
|
|
109
|
+
return seconds
|
|
110
|
+
|
|
111
|
+
|
|
72
112
|
def paasta_autoscale(args):
|
|
73
113
|
log.setLevel(logging.DEBUG)
|
|
74
114
|
service = figure_out_service_name(args)
|
|
115
|
+
|
|
116
|
+
if args.set_min is not None and not args.duration:
|
|
117
|
+
print(
|
|
118
|
+
PaastaColors.yellow(
|
|
119
|
+
"WARNING: --set-min requires --for parameter to specify duration - defaulting to 30m"
|
|
120
|
+
)
|
|
121
|
+
)
|
|
122
|
+
args.duration = "30m"
|
|
123
|
+
|
|
124
|
+
if args.duration is not None and args.set_min is None:
|
|
125
|
+
print(PaastaColors.red("Error: --for requires --set-min parameter"))
|
|
126
|
+
return 1
|
|
127
|
+
|
|
128
|
+
if args.set is not None and args.set_min is not None:
|
|
129
|
+
print(
|
|
130
|
+
PaastaColors.red(
|
|
131
|
+
"Error: Cannot use both --set and --set-min at the same time"
|
|
132
|
+
)
|
|
133
|
+
)
|
|
134
|
+
return 1
|
|
135
|
+
|
|
75
136
|
instance_config = next(
|
|
76
137
|
get_instance_configs_for_service(
|
|
77
138
|
service=service,
|
|
@@ -83,7 +144,7 @@ def paasta_autoscale(args):
|
|
|
83
144
|
)
|
|
84
145
|
if not instance_config:
|
|
85
146
|
print(
|
|
86
|
-
"Could not find config files for this service instance in soaconfigs. Maybe you
|
|
147
|
+
"Could not find config files for this service instance in soaconfigs. Maybe you misspelled an argument?"
|
|
87
148
|
)
|
|
88
149
|
return 1
|
|
89
150
|
|
|
@@ -99,12 +160,15 @@ def paasta_autoscale(args):
|
|
|
99
160
|
return 1
|
|
100
161
|
|
|
101
162
|
try:
|
|
102
|
-
|
|
163
|
+
# get current autoscaler count
|
|
164
|
+
if args.set is None and args.set_min is None:
|
|
103
165
|
log.debug("Getting the current autoscaler count...")
|
|
104
166
|
res, status, _ = api.autoscaler.get_autoscaler_count(
|
|
105
167
|
service=service, instance=args.instance, _return_http_data_only=False
|
|
106
168
|
)
|
|
107
|
-
|
|
169
|
+
|
|
170
|
+
# set desired instances
|
|
171
|
+
elif args.set is not None:
|
|
108
172
|
log.debug(f"Setting desired instances to {args.set}.")
|
|
109
173
|
msg = paastamodels.AutoscalerCountMsg(desired_instances=int(args.set))
|
|
110
174
|
res, status, _ = api.autoscaler.update_autoscaler_count(
|
|
@@ -121,16 +185,53 @@ def paasta_autoscale(args):
|
|
|
121
185
|
instance=args.instance,
|
|
122
186
|
cluster=args.cluster,
|
|
123
187
|
)
|
|
188
|
+
|
|
189
|
+
# set lower bound
|
|
190
|
+
elif args.set_min is not None:
|
|
191
|
+
duration_seconds = parse_duration_to_seconds(args.duration)
|
|
192
|
+
if not duration_seconds:
|
|
193
|
+
print(
|
|
194
|
+
PaastaColors.red(
|
|
195
|
+
f"Error: Invalid duration format '{args.duration}'. "
|
|
196
|
+
f"Please use a format like '3h' or '30m'."
|
|
197
|
+
)
|
|
198
|
+
)
|
|
199
|
+
return 1
|
|
200
|
+
# NOTE: this is explicitly using time.time() since we're doing everything using epoch time
|
|
201
|
+
# for simplicity
|
|
202
|
+
expiration_time = time.time() + duration_seconds
|
|
203
|
+
|
|
204
|
+
log.debug(
|
|
205
|
+
f"Setting minimum instances to {args.set_min} for duration {args.duration}."
|
|
206
|
+
)
|
|
207
|
+
msg = paastamodels.AutoscalingOverride(
|
|
208
|
+
min_instances=args.set_min,
|
|
209
|
+
expire_after=expiration_time,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
res, status, _ = api.autoscaler.set_autoscaling_override(
|
|
213
|
+
service=service,
|
|
214
|
+
instance=args.instance,
|
|
215
|
+
autoscaling_override=msg,
|
|
216
|
+
_return_http_data_only=False,
|
|
217
|
+
)
|
|
218
|
+
_log_audit(
|
|
219
|
+
action="manual-autoscale-override",
|
|
220
|
+
action_details=str(msg),
|
|
221
|
+
service=service,
|
|
222
|
+
instance=args.instance,
|
|
223
|
+
cluster=args.cluster,
|
|
224
|
+
)
|
|
124
225
|
except api.api_error as exc:
|
|
125
226
|
status = exc.status
|
|
126
227
|
|
|
127
228
|
if not 200 <= status <= 299:
|
|
128
229
|
print(
|
|
129
230
|
PaastaColors.red(
|
|
130
|
-
f"ERROR: '{args.instance}' is not configured to autoscale, "
|
|
131
|
-
f"
|
|
231
|
+
f"ERROR: '{args.instance}' is not configured to autoscale OR you set min_instances above the current max_instances, "
|
|
232
|
+
f"and `paasta autoscale` could not update it. "
|
|
132
233
|
f"If you want to be able to boost this service, please configure autoscaling for the service "
|
|
133
|
-
f"in its config file by setting min and max instances. Example: \n"
|
|
234
|
+
f"in its config file by setting min and max instances appropriately. Example: \n"
|
|
134
235
|
f"{args.instance}:\n"
|
|
135
236
|
f" min_instances: 5\n"
|
|
136
237
|
f" max_instances: 50"
|
|
@@ -139,5 +240,23 @@ def paasta_autoscale(args):
|
|
|
139
240
|
return 0
|
|
140
241
|
|
|
141
242
|
log.debug(f"Res: {res} Http: {status}")
|
|
142
|
-
|
|
243
|
+
if not args.set_min:
|
|
244
|
+
print(f"Desired instances: {res.desired_instances}")
|
|
245
|
+
elif args.set_min:
|
|
246
|
+
print(
|
|
247
|
+
f"Temporary override set for {args.service}.{args.instance} with minimum instances: {args.set_min}"
|
|
248
|
+
)
|
|
249
|
+
# folks using this might be in different timezones, so let's convert the expiration time to a few common ones
|
|
250
|
+
# to make it extra clear when the override will expire
|
|
251
|
+
epoch_time = datetime.fromtimestamp(res.expire_after)
|
|
252
|
+
eastern_time = epoch_time.astimezone(pytz.timezone("US/Eastern"))
|
|
253
|
+
pacific_time = epoch_time.astimezone(pytz.timezone("US/Pacific"))
|
|
254
|
+
london_time = epoch_time.astimezone(pytz.timezone("Europe/London"))
|
|
255
|
+
|
|
256
|
+
time_format = "%Y-%m-%d %H:%M:%S %Z%z"
|
|
257
|
+
print(f"The {args.duration} override will expire at:")
|
|
258
|
+
print(f"Eastern Time: {eastern_time.strftime(time_format)}")
|
|
259
|
+
print(f"Pacific Time: {pacific_time.strftime(time_format)}")
|
|
260
|
+
print(f"London Time: {london_time.strftime(time_format)}")
|
|
261
|
+
|
|
143
262
|
return 0
|
|
@@ -14,6 +14,8 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
import argparse
|
|
16
16
|
import shutil
|
|
17
|
+
import subprocess
|
|
18
|
+
import sys
|
|
17
19
|
import time
|
|
18
20
|
from typing import List
|
|
19
21
|
|
|
@@ -32,9 +34,12 @@ from paasta_tools.utils import load_system_paasta_config
|
|
|
32
34
|
from paasta_tools.utils import SystemPaastaConfig
|
|
33
35
|
|
|
34
36
|
|
|
35
|
-
|
|
37
|
+
KUBECTL_EXEC_CMD_TEMPLATE = (
|
|
36
38
|
"{kubectl_wrapper} --token {token} exec -it -n {namespace} {pod} -- /bin/bash"
|
|
37
39
|
)
|
|
40
|
+
KUBECTL_CP_CMD_TEMPLATE = (
|
|
41
|
+
"{kubectl_wrapper} --token {token} -n {namespace} cp {filename} {pod}:/tmp/"
|
|
42
|
+
)
|
|
38
43
|
|
|
39
44
|
|
|
40
45
|
def _list_services_and_toolboxes() -> List[str]:
|
|
@@ -55,7 +60,9 @@ def _list_services_and_toolboxes() -> List[str]:
|
|
|
55
60
|
def paasta_remote_run_start(
|
|
56
61
|
args: argparse.Namespace,
|
|
57
62
|
system_paasta_config: SystemPaastaConfig,
|
|
63
|
+
recursed: bool = False,
|
|
58
64
|
) -> int:
|
|
65
|
+
status_prefix = "\x1b[2K\r" # Clear line, carriage return
|
|
59
66
|
client = get_paasta_oapi_client_with_auth(
|
|
60
67
|
cluster=get_paasta_oapi_api_clustername(cluster=args.cluster, is_eks=True),
|
|
61
68
|
system_paasta_config=system_paasta_config,
|
|
@@ -95,10 +102,18 @@ def paasta_remote_run_start(
|
|
|
95
102
|
if poll_response.status == 200:
|
|
96
103
|
print("")
|
|
97
104
|
break
|
|
98
|
-
print(f"
|
|
105
|
+
print(f"{status_prefix}Status: {poll_response.message}", end="")
|
|
106
|
+
if poll_response.status == 404:
|
|
107
|
+
# Probably indicates a pod was terminating. Now that its gone, retry the whole process
|
|
108
|
+
if not recursed:
|
|
109
|
+
print("\nPod finished terminating. Rerunning")
|
|
110
|
+
return paasta_remote_run_start(args, system_paasta_config, True)
|
|
111
|
+
else:
|
|
112
|
+
print("\nSomething went wrong. Pod still not found.")
|
|
113
|
+
return 1
|
|
99
114
|
time.sleep(10)
|
|
100
115
|
else:
|
|
101
|
-
print("Timed out while waiting for job to start")
|
|
116
|
+
print(f"{status_prefix}Timed out while waiting for job to start")
|
|
102
117
|
return 1
|
|
103
118
|
|
|
104
119
|
if not args.interactive and not args.toolbox:
|
|
@@ -120,13 +135,28 @@ def paasta_remote_run_start(
|
|
|
120
135
|
kubectl_wrapper = f"kubectl-eks-{args.cluster}"
|
|
121
136
|
if not shutil.which(kubectl_wrapper):
|
|
122
137
|
kubectl_wrapper = f"kubectl-{args.cluster}"
|
|
123
|
-
exec_command =
|
|
138
|
+
exec_command = KUBECTL_EXEC_CMD_TEMPLATE.format(
|
|
124
139
|
kubectl_wrapper=kubectl_wrapper,
|
|
125
140
|
namespace=poll_response.namespace,
|
|
126
141
|
pod=poll_response.pod_name,
|
|
127
142
|
token=token_response.token,
|
|
128
143
|
)
|
|
129
144
|
|
|
145
|
+
if args.copy_file:
|
|
146
|
+
for filename in args.copy_file:
|
|
147
|
+
cp_command = KUBECTL_CP_CMD_TEMPLATE.format(
|
|
148
|
+
kubectl_wrapper=kubectl_wrapper,
|
|
149
|
+
namespace=poll_response.namespace,
|
|
150
|
+
pod=poll_response.pod_name,
|
|
151
|
+
filename=filename,
|
|
152
|
+
token=token_response.token,
|
|
153
|
+
).split(" ")
|
|
154
|
+
call = subprocess.run(cp_command, capture_output=True)
|
|
155
|
+
if call.returncode != 0:
|
|
156
|
+
print("Error copying file to remote-run pod: ", file=sys.stderr)
|
|
157
|
+
print(call.stderr.decode("utf-8"), file=sys.stderr)
|
|
158
|
+
return 1
|
|
159
|
+
|
|
130
160
|
run_interactive_cli(exec_command)
|
|
131
161
|
return 0
|
|
132
162
|
|
|
@@ -232,6 +262,12 @@ def add_subparser(subparsers: argparse._SubParsersAction) -> None:
|
|
|
232
262
|
type=int,
|
|
233
263
|
default=600,
|
|
234
264
|
)
|
|
265
|
+
start_parser.add_argument(
|
|
266
|
+
"--copy-file",
|
|
267
|
+
help="Adds a local file to /tmp inside the pod",
|
|
268
|
+
type=str,
|
|
269
|
+
action="append",
|
|
270
|
+
)
|
|
235
271
|
stop_parser = subparsers.add_parser(
|
|
236
272
|
"stop",
|
|
237
273
|
help="Stop your remote-run job if it exists",
|
|
@@ -375,6 +375,20 @@ def add_subparser(subparsers):
|
|
|
375
375
|
default=False,
|
|
376
376
|
)
|
|
377
377
|
|
|
378
|
+
list_parser.add_argument(
|
|
379
|
+
"--jira-ticket",
|
|
380
|
+
help=(
|
|
381
|
+
"The top level jira ticket used to track the project that this spark-job is related to. "
|
|
382
|
+
"eg: --jira-ticket=PROJ-123. "
|
|
383
|
+
"Must be passed for all adhoc jobs. "
|
|
384
|
+
"See https://yelpwiki.yelpcorp.com/spaces/AML/pages/402885641. "
|
|
385
|
+
),
|
|
386
|
+
type=str,
|
|
387
|
+
required=False,
|
|
388
|
+
dest="jira_ticket",
|
|
389
|
+
default=None,
|
|
390
|
+
)
|
|
391
|
+
|
|
378
392
|
aws_group = list_parser.add_argument_group(
|
|
379
393
|
title="AWS credentials options",
|
|
380
394
|
description="If --aws-credentials-yaml is specified, it overrides all "
|
|
@@ -1383,6 +1397,7 @@ def paasta_spark_run(args: argparse.Namespace) -> int:
|
|
|
1383
1397
|
use_eks=True,
|
|
1384
1398
|
k8s_server_address=k8s_server_address,
|
|
1385
1399
|
service_account_name=service_account_name,
|
|
1400
|
+
jira_ticket=args.jira_ticket,
|
|
1386
1401
|
)
|
|
1387
1402
|
|
|
1388
1403
|
return configure_and_run_docker_container(
|
|
@@ -18,6 +18,7 @@ from paasta_tools.kubernetes_tools import create_job
|
|
|
18
18
|
from paasta_tools.kubernetes_tools import create_pod_disruption_budget
|
|
19
19
|
from paasta_tools.kubernetes_tools import create_stateful_set
|
|
20
20
|
from paasta_tools.kubernetes_tools import ensure_service_account
|
|
21
|
+
from paasta_tools.kubernetes_tools import HpaOverride
|
|
21
22
|
from paasta_tools.kubernetes_tools import KubeClient
|
|
22
23
|
from paasta_tools.kubernetes_tools import KubeDeployment
|
|
23
24
|
from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
|
|
@@ -219,6 +220,15 @@ class Application(ABC):
|
|
|
219
220
|
|
|
220
221
|
|
|
221
222
|
class DeploymentWrapper(Application):
|
|
223
|
+
def __init__(
|
|
224
|
+
self,
|
|
225
|
+
item: Union[V1Deployment, V1StatefulSet],
|
|
226
|
+
logging=logging.getLogger(__name__),
|
|
227
|
+
hpa_override: Optional[HpaOverride] = None,
|
|
228
|
+
) -> None:
|
|
229
|
+
super().__init__(item, logging)
|
|
230
|
+
self.hpa_override = hpa_override
|
|
231
|
+
|
|
222
232
|
def deep_delete(
|
|
223
233
|
self, kube_client: KubeClient, propagation_policy="Foreground"
|
|
224
234
|
) -> None:
|
|
@@ -290,6 +300,9 @@ class DeploymentWrapper(Application):
|
|
|
290
300
|
cluster=self.soa_config.cluster,
|
|
291
301
|
kube_client=kube_client,
|
|
292
302
|
namespace=self.item.metadata.namespace,
|
|
303
|
+
min_instances_override=(
|
|
304
|
+
self.hpa_override["min_instances"] if self.hpa_override else None
|
|
305
|
+
),
|
|
293
306
|
)
|
|
294
307
|
|
|
295
308
|
hpa_exists = self.exists_hpa(kube_client)
|
|
@@ -461,11 +474,12 @@ class JobWrapper(Application):
|
|
|
461
474
|
|
|
462
475
|
|
|
463
476
|
def get_application_wrapper(
|
|
464
|
-
formatted_application: Union[V1Deployment, V1StatefulSet, V1Job]
|
|
477
|
+
formatted_application: Union[V1Deployment, V1StatefulSet, V1Job],
|
|
478
|
+
hpa_override: Optional[HpaOverride] = None,
|
|
465
479
|
) -> Application:
|
|
466
480
|
app: Application
|
|
467
481
|
if isinstance(formatted_application, V1Deployment):
|
|
468
|
-
app = DeploymentWrapper(formatted_application)
|
|
482
|
+
app = DeploymentWrapper(formatted_application, hpa_override=hpa_override)
|
|
469
483
|
elif isinstance(formatted_application, V1StatefulSet):
|
|
470
484
|
app = StatefulSetWrapper(formatted_application)
|
|
471
485
|
elif isinstance(formatted_application, V1Job):
|
|
@@ -189,6 +189,8 @@ def remote_run_ready(
|
|
|
189
189
|
if not pod:
|
|
190
190
|
return {"status": 404, "message": "No pod found"}
|
|
191
191
|
if pod.status.phase == "Running":
|
|
192
|
+
if pod.metadata.deletion_timestamp:
|
|
193
|
+
return {"status": 409, "message": "Pod is terminating"}
|
|
192
194
|
result: RemoteRunOutcome = {
|
|
193
195
|
"status": 200,
|
|
194
196
|
"message": "Pod ready",
|