paasta-tools 1.21.3__py3-none-any.whl → 1.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paasta_tools/__init__.py +1 -1
- paasta_tools/api/api.py +5 -0
- paasta_tools/api/api_docs/swagger.json +92 -0
- paasta_tools/api/views/autoscaler.py +122 -0
- paasta_tools/cli/authentication.py +6 -39
- paasta_tools/cli/cmds/autoscale.py +126 -7
- paasta_tools/cli/cmds/logs.py +7 -7
- paasta_tools/cli/schemas/service_schema.json +4 -1
- paasta_tools/generate_deployments_for_service.py +4 -0
- paasta_tools/kubernetes/application/controller_wrappers.py +16 -2
- paasta_tools/kubernetes_tools.py +77 -1
- paasta_tools/paastaapi/api/autoscaler_api.py +140 -1
- paasta_tools/paastaapi/model/autoscaling_override.py +180 -0
- paasta_tools/paastaapi/model/inline_response202.py +182 -0
- paasta_tools/paastaapi/models/__init__.py +2 -0
- paasta_tools/setup_istio_mesh.py +1 -1
- paasta_tools/setup_kubernetes_job.py +105 -1
- paasta_tools/smartstack_tools.py +2 -2
- paasta_tools/spark_tools.py +2 -2
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/generate_deployments_for_service.py +4 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/setup_istio_mesh.py +1 -1
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/setup_kubernetes_job.py +105 -1
- {paasta_tools-1.21.3.dist-info → paasta_tools-1.22.0.dist-info}/METADATA +2 -2
- {paasta_tools-1.21.3.dist-info → paasta_tools-1.22.0.dist-info}/RECORD +77 -75
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/apply_external_resources.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/bounce_log_latency_parser.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/check_autoscaler_max_instances.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/check_cassandracluster_services_replication.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/check_flink_services_health.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/check_kubernetes_api.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/check_kubernetes_services_replication.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/check_manual_oapi_changes.sh +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/check_oom_events.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/check_orphans.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/check_spark_jobs.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/cleanup_kubernetes_cr.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/cleanup_kubernetes_crd.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/cleanup_kubernetes_jobs.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/create_dynamodb_table.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/create_paasta_playground.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/delete_kubernetes_deployments.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/emit_allocated_cpu_metrics.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/generate_all_deployments +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/generate_authenticating_services.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/generate_services_file.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/generate_services_yaml.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/get_running_task_allocation.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/habitat_fixer.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/ide_helper.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/is_pod_healthy_in_proxy.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/is_pod_healthy_in_smartstack.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/kill_bad_containers.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/kubernetes_remove_evicted_pods.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/mass-deploy-tag.sh +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/mock_patch_checker.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/paasta_cleanup_remote_run_resources.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/paasta_cleanup_stale_nodes.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/paasta_deploy_tron_jobs +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/paasta_execute_docker_command.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/paasta_secrets_sync.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/paasta_tabcomplete.sh +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/paasta_update_soa_memcpu.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/render_template.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/rightsizer_soaconfigs_update.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/service_shard_remove.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/service_shard_update.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/setup_kubernetes_cr.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/setup_kubernetes_crd.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/setup_kubernetes_internal_crd.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/setup_prometheus_adapter_config.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/shared_ip_check.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/synapse_srv_namespaces_fact.py +0 -0
- {paasta_tools-1.21.3.data → paasta_tools-1.22.0.data}/scripts/timeouts_metrics_prom.py +0 -0
- {paasta_tools-1.21.3.dist-info → paasta_tools-1.22.0.dist-info}/LICENSE +0 -0
- {paasta_tools-1.21.3.dist-info → paasta_tools-1.22.0.dist-info}/WHEEL +0 -0
- {paasta_tools-1.21.3.dist-info → paasta_tools-1.22.0.dist-info}/entry_points.txt +0 -0
- {paasta_tools-1.21.3.dist-info → paasta_tools-1.22.0.dist-info}/top_level.txt +0 -0
paasta_tools/__init__.py
CHANGED
paasta_tools/api/api.py
CHANGED
|
@@ -189,6 +189,11 @@ def make_app(global_config=None):
|
|
|
189
189
|
"/v1/service_autoscaler/pause",
|
|
190
190
|
request_method="GET",
|
|
191
191
|
)
|
|
192
|
+
config.add_route(
|
|
193
|
+
"service_autoscaler.autoscaling_override.post",
|
|
194
|
+
"/v1/service_autoscaler/{service}/{instance}/autoscaling_override",
|
|
195
|
+
request_method="POST",
|
|
196
|
+
)
|
|
192
197
|
config.add_route(
|
|
193
198
|
"remote_run.start",
|
|
194
199
|
"/v1/remote_run/{service}/{instance}/start",
|
|
@@ -279,6 +279,79 @@
|
|
|
279
279
|
"operationId": "delete_service_autoscaler_pause"
|
|
280
280
|
}
|
|
281
281
|
},
|
|
282
|
+
"/service_autoscaler/{service}/{instance}/autoscaling_override": {
|
|
283
|
+
"post": {
|
|
284
|
+
"operationId": "set_autoscaling_override",
|
|
285
|
+
"parameters": [
|
|
286
|
+
{
|
|
287
|
+
"description": "Service name",
|
|
288
|
+
"in": "path",
|
|
289
|
+
"name": "service",
|
|
290
|
+
"required": true,
|
|
291
|
+
"type": "string"
|
|
292
|
+
},
|
|
293
|
+
{
|
|
294
|
+
"description": "Instance name",
|
|
295
|
+
"in": "path",
|
|
296
|
+
"name": "instance",
|
|
297
|
+
"required": true,
|
|
298
|
+
"type": "string"
|
|
299
|
+
},
|
|
300
|
+
{
|
|
301
|
+
"in": "body",
|
|
302
|
+
"name": "json_body",
|
|
303
|
+
"required": true,
|
|
304
|
+
"schema": {
|
|
305
|
+
"$ref": "#/definitions/AutoscalingOverride"
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
],
|
|
309
|
+
"responses": {
|
|
310
|
+
"202": {
|
|
311
|
+
"description": "Successfully set autoscaling override",
|
|
312
|
+
"schema": {
|
|
313
|
+
"type": "object",
|
|
314
|
+
"properties": {
|
|
315
|
+
"service": {
|
|
316
|
+
"type": "string",
|
|
317
|
+
"description": "Service name"
|
|
318
|
+
},
|
|
319
|
+
"instance": {
|
|
320
|
+
"type": "string",
|
|
321
|
+
"description": "Instance name"
|
|
322
|
+
},
|
|
323
|
+
"min_instances": {
|
|
324
|
+
"type": "integer",
|
|
325
|
+
"description": "Minimum number of instances to run"
|
|
326
|
+
},
|
|
327
|
+
"expire_after": {
|
|
328
|
+
"type": "number",
|
|
329
|
+
"format": "float",
|
|
330
|
+
"description": "Unix timestamp after which the override is no longer valid"
|
|
331
|
+
},
|
|
332
|
+
"status": {
|
|
333
|
+
"type": "string",
|
|
334
|
+
"description": "Status of the operation"
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
},
|
|
339
|
+
"400": {
|
|
340
|
+
"description": "Invalid request parameters"
|
|
341
|
+
},
|
|
342
|
+
"501": {
|
|
343
|
+
"description": "Autoscaling not supported for this instance type"
|
|
344
|
+
},
|
|
345
|
+
"500": {
|
|
346
|
+
"description": "Failed to set autoscaling override"
|
|
347
|
+
}
|
|
348
|
+
},
|
|
349
|
+
"summary": "Set a temporary autoscaling override for a service instance",
|
|
350
|
+
"tags": [
|
|
351
|
+
"autoscaler"
|
|
352
|
+
]
|
|
353
|
+
}
|
|
354
|
+
},
|
|
282
355
|
"/resources/utilization": {
|
|
283
356
|
"get": {
|
|
284
357
|
"responses": {
|
|
@@ -1627,6 +1700,25 @@
|
|
|
1627
1700
|
}
|
|
1628
1701
|
}
|
|
1629
1702
|
},
|
|
1703
|
+
"AutoscalingOverride": {
|
|
1704
|
+
"type": "object",
|
|
1705
|
+
"properties": {
|
|
1706
|
+
"min_instances": {
|
|
1707
|
+
"type": "integer",
|
|
1708
|
+
"description": "Minimum number of instances to run",
|
|
1709
|
+
"minimum": 1
|
|
1710
|
+
},
|
|
1711
|
+
"expire_after": {
|
|
1712
|
+
"type": "number",
|
|
1713
|
+
"format": "float",
|
|
1714
|
+
"description": "Unix timestamp when this override is no longer valid"
|
|
1715
|
+
}
|
|
1716
|
+
},
|
|
1717
|
+
"required": [
|
|
1718
|
+
"min_instances",
|
|
1719
|
+
"expire_after"
|
|
1720
|
+
]
|
|
1721
|
+
},
|
|
1630
1722
|
"KubernetesReplicaSet": {
|
|
1631
1723
|
"type": "object",
|
|
1632
1724
|
"properties": {
|
|
@@ -15,13 +15,27 @@
|
|
|
15
15
|
"""
|
|
16
16
|
PaaSTA service list (instances) etc.
|
|
17
17
|
"""
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
from datetime import datetime
|
|
21
|
+
from datetime import timezone
|
|
22
|
+
from typing import Tuple
|
|
23
|
+
|
|
24
|
+
from kubernetes.client import V1ConfigMap
|
|
18
25
|
from pyramid.response import Response
|
|
19
26
|
from pyramid.view import view_config
|
|
20
27
|
|
|
21
28
|
from paasta_tools.api import settings
|
|
22
29
|
from paasta_tools.api.views.exception import ApiFailure
|
|
23
30
|
from paasta_tools.cli.utils import get_instance_config
|
|
31
|
+
from paasta_tools.kubernetes_tools import AUTOSCALING_OVERRIDES_CONFIGMAP_NAME
|
|
32
|
+
from paasta_tools.kubernetes_tools import AUTOSCALING_OVERRIDES_CONFIGMAP_NAMESPACE
|
|
33
|
+
from paasta_tools.kubernetes_tools import get_or_create_namespaced_configmap
|
|
24
34
|
from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
|
|
35
|
+
from paasta_tools.kubernetes_tools import patch_namespaced_configmap
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
log = logging.getLogger(__name__)
|
|
25
39
|
|
|
26
40
|
|
|
27
41
|
@view_config(route_name="service.autoscaler.get", request_method="GET", renderer="json")
|
|
@@ -98,3 +112,111 @@ def update_autoscaler_count(request):
|
|
|
98
112
|
|
|
99
113
|
response_body = {"desired_instances": desired_instances, "status": status}
|
|
100
114
|
return Response(json_body=response_body, status_code=202)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_or_create_autoscaling_overrides_configmap() -> Tuple[V1ConfigMap, bool]:
|
|
118
|
+
return get_or_create_namespaced_configmap(
|
|
119
|
+
AUTOSCALING_OVERRIDES_CONFIGMAP_NAME,
|
|
120
|
+
namespace=AUTOSCALING_OVERRIDES_CONFIGMAP_NAMESPACE,
|
|
121
|
+
kube_client=settings.kubernetes_client,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
@view_config(
|
|
126
|
+
route_name="service_autoscaler.autoscaling_override.post",
|
|
127
|
+
request_method="POST",
|
|
128
|
+
renderer="json",
|
|
129
|
+
)
|
|
130
|
+
def set_autoscaling_override(request):
|
|
131
|
+
"""Set a temporary autoscaling override for a service/instance.
|
|
132
|
+
|
|
133
|
+
This endpoint creates or updates a ConfigMap entry with override information
|
|
134
|
+
including expiration time. The override will be applied by the autoscaler.
|
|
135
|
+
|
|
136
|
+
Required parameters:
|
|
137
|
+
- service: The service name
|
|
138
|
+
- instance: The instance name
|
|
139
|
+
- min_instances: The minimum number of instances to enforce
|
|
140
|
+
- expires_after: unix timestamp after which the override is no longer valid
|
|
141
|
+
"""
|
|
142
|
+
service = request.swagger_data.get("service")
|
|
143
|
+
instance = request.swagger_data.get("instance")
|
|
144
|
+
cluster = settings.cluster
|
|
145
|
+
soa_dir = settings.soa_dir
|
|
146
|
+
|
|
147
|
+
instance_config = get_instance_config(
|
|
148
|
+
service, instance, cluster, soa_dir, load_deployments=False
|
|
149
|
+
)
|
|
150
|
+
if not isinstance(instance_config, KubernetesDeploymentConfig):
|
|
151
|
+
error_message = (
|
|
152
|
+
f"Autoscaling is not supported for {service}.{instance} because instance type is not "
|
|
153
|
+
f"kubernetes."
|
|
154
|
+
)
|
|
155
|
+
raise ApiFailure(error_message, 501)
|
|
156
|
+
|
|
157
|
+
json_body = request.swagger_data.get("json_body", {})
|
|
158
|
+
min_instances_override = json_body.get("min_instances")
|
|
159
|
+
expire_after = json_body.get("expire_after")
|
|
160
|
+
|
|
161
|
+
if not isinstance(min_instances_override, int) or min_instances_override < 1:
|
|
162
|
+
raise ApiFailure("min_instances must be a positive integer", 400)
|
|
163
|
+
|
|
164
|
+
if not expire_after:
|
|
165
|
+
raise ApiFailure("expire_after is required", 400)
|
|
166
|
+
|
|
167
|
+
max_instances = instance_config.get_max_instances()
|
|
168
|
+
if max_instances is None:
|
|
169
|
+
raise ApiFailure(f"Autoscaling is not enabled for {service}.{instance}", 400)
|
|
170
|
+
|
|
171
|
+
if max_instances < min_instances_override:
|
|
172
|
+
raise ApiFailure(
|
|
173
|
+
f"min_instances ({min_instances_override}) cannot be greater than max_instances ({max_instances})",
|
|
174
|
+
400,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
configmap, created = get_or_create_autoscaling_overrides_configmap()
|
|
178
|
+
if created:
|
|
179
|
+
log.info("Created new autoscaling overrides ConfigMap")
|
|
180
|
+
# i dunno why this is necessary, but a newly created configmap doesn't have a data field
|
|
181
|
+
# even when we set it in the create call
|
|
182
|
+
if not configmap.data:
|
|
183
|
+
configmap.data = {}
|
|
184
|
+
|
|
185
|
+
override_data = {
|
|
186
|
+
"min_instances": min_instances_override,
|
|
187
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
188
|
+
# NOTE: we may want to also allow setting a max_instances override in the future, but if we do that
|
|
189
|
+
# we'd probably want to force folks to either set one or both and share the same expiration time
|
|
190
|
+
"expire_after": expire_after,
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
service_instance = f"{service}.{instance}"
|
|
194
|
+
existing_overrides = (
|
|
195
|
+
json.loads(configmap.data[service_instance])
|
|
196
|
+
if service_instance in configmap.data
|
|
197
|
+
else {}
|
|
198
|
+
)
|
|
199
|
+
merged_overrides = {**existing_overrides, **override_data}
|
|
200
|
+
serialized_overrides = json.dumps(merged_overrides)
|
|
201
|
+
|
|
202
|
+
patch_namespaced_configmap(
|
|
203
|
+
name=AUTOSCALING_OVERRIDES_CONFIGMAP_NAME,
|
|
204
|
+
namespace=AUTOSCALING_OVERRIDES_CONFIGMAP_NAMESPACE,
|
|
205
|
+
# this should only update the single entry for the $service.$instance key
|
|
206
|
+
# ain't k8s grand?
|
|
207
|
+
body={"data": {service_instance: serialized_overrides}},
|
|
208
|
+
kube_client=settings.kubernetes_client,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
response_body = {
|
|
212
|
+
"service": service,
|
|
213
|
+
"instance": instance,
|
|
214
|
+
"cluster": cluster,
|
|
215
|
+
"min_instances": min_instances_override,
|
|
216
|
+
"expire_after": expire_after,
|
|
217
|
+
"status": "SUCCESS",
|
|
218
|
+
}
|
|
219
|
+
# NOTE: this is an HTTP 202 since actually updating the HPA happens asynchronously
|
|
220
|
+
# through setup_kubernetes_job
|
|
221
|
+
# XXX: should we try to patch things here as well?
|
|
222
|
+
return Response(json_body=response_body, status_code=202)
|
|
@@ -12,63 +12,30 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
from functools import lru_cache
|
|
15
|
-
|
|
16
|
-
from botocore.credentials import InstanceMetadataFetcher
|
|
17
|
-
from botocore.credentials import InstanceMetadataProvider
|
|
15
|
+
from typing import Optional
|
|
18
16
|
|
|
19
17
|
from paasta_tools.utils import load_system_paasta_config
|
|
20
18
|
|
|
21
19
|
|
|
22
20
|
try:
|
|
23
|
-
from vault_tools.
|
|
24
|
-
from vault_tools.paasta_secret import get_vault_url
|
|
25
|
-
from vault_tools.paasta_secret import get_vault_ca
|
|
21
|
+
from vault_tools.oidc import get_instance_oidc_identity_token
|
|
26
22
|
from okta_auth import get_and_cache_jwt_default
|
|
27
23
|
except ImportError:
|
|
28
24
|
|
|
29
|
-
def
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def get_vault_url(ecosystem: str) -> str:
|
|
33
|
-
return ""
|
|
34
|
-
|
|
35
|
-
def get_vault_ca(ecosystem: str) -> str:
|
|
25
|
+
def get_instance_oidc_identity_token(
|
|
26
|
+
role: str, ecosystem: Optional[str] = None
|
|
27
|
+
) -> str:
|
|
36
28
|
return ""
|
|
37
29
|
|
|
38
30
|
def get_and_cache_jwt_default(client_id: str) -> str:
|
|
39
31
|
return ""
|
|
40
32
|
|
|
41
33
|
|
|
42
|
-
def get_current_ecosystem() -> str:
|
|
43
|
-
"""Get current ecosystem from host configs, defaults to dev if no config is found"""
|
|
44
|
-
try:
|
|
45
|
-
with open("/nail/etc/ecosystem") as f:
|
|
46
|
-
return f.read().strip()
|
|
47
|
-
except IOError:
|
|
48
|
-
pass
|
|
49
|
-
return "devc"
|
|
50
|
-
|
|
51
|
-
|
|
52
34
|
@lru_cache(maxsize=1)
|
|
53
35
|
def get_service_auth_token() -> str:
|
|
54
36
|
"""Uses instance profile to authenticate with Vault and generate token for service authentication"""
|
|
55
|
-
ecosystem = get_current_ecosystem()
|
|
56
|
-
vault_client = get_vault_client(get_vault_url(ecosystem), get_vault_ca(ecosystem))
|
|
57
37
|
vault_role = load_system_paasta_config().get_service_auth_vault_role()
|
|
58
|
-
|
|
59
|
-
iam_role_fetcher=InstanceMetadataFetcher(),
|
|
60
|
-
)
|
|
61
|
-
instance_credentials = metadata_provider.load().get_frozen_credentials()
|
|
62
|
-
vault_client.auth.aws.iam_login(
|
|
63
|
-
instance_credentials.access_key,
|
|
64
|
-
instance_credentials.secret_key,
|
|
65
|
-
instance_credentials.token,
|
|
66
|
-
mount_point="aws-iam",
|
|
67
|
-
role=vault_role,
|
|
68
|
-
use_token=True,
|
|
69
|
-
)
|
|
70
|
-
response = vault_client.secrets.identity.generate_signed_id_token(name=vault_role)
|
|
71
|
-
return response["data"]["token"]
|
|
38
|
+
return get_instance_oidc_identity_token(vault_role)
|
|
72
39
|
|
|
73
40
|
|
|
74
41
|
def get_sso_auth_token(paasta_apis: bool = False) -> str:
|
|
@@ -13,6 +13,12 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
import logging
|
|
16
|
+
import time
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from typing import Optional
|
|
19
|
+
|
|
20
|
+
import pytz
|
|
21
|
+
from pytimeparse.timeparse import timeparse
|
|
16
22
|
|
|
17
23
|
import paasta_tools.paastaapi.models as paastamodels
|
|
18
24
|
from paasta_tools.api import client
|
|
@@ -58,6 +64,24 @@ def add_subparser(subparsers):
|
|
|
58
64
|
autoscale_parser.add_argument(
|
|
59
65
|
"--set", help="Set the number to scale to. Must be an Int.", type=int
|
|
60
66
|
)
|
|
67
|
+
|
|
68
|
+
# Temporary override options
|
|
69
|
+
override_group = autoscale_parser.add_argument_group("Temporary Override Options")
|
|
70
|
+
override_group.add_argument(
|
|
71
|
+
"--set-min",
|
|
72
|
+
help="Set the minimum number of replicas (must be >= 1). Requires --for parameter.",
|
|
73
|
+
type=lambda x: int(x)
|
|
74
|
+
if int(x) >= 1
|
|
75
|
+
else autoscale_parser.error("Minimum instances must be >= 1"),
|
|
76
|
+
default=None,
|
|
77
|
+
)
|
|
78
|
+
override_group.add_argument(
|
|
79
|
+
"--for",
|
|
80
|
+
dest="duration",
|
|
81
|
+
help="Duration for the temporary override (e.g. '3h', '30m'). Required when using --set-min.",
|
|
82
|
+
default=None,
|
|
83
|
+
)
|
|
84
|
+
|
|
61
85
|
autoscale_parser.add_argument(
|
|
62
86
|
"-d",
|
|
63
87
|
"--soa-dir",
|
|
@@ -69,9 +93,46 @@ def add_subparser(subparsers):
|
|
|
69
93
|
autoscale_parser.set_defaults(command=paasta_autoscale)
|
|
70
94
|
|
|
71
95
|
|
|
96
|
+
def parse_duration_to_seconds(duration: str) -> Optional[int]:
|
|
97
|
+
"""Parse a duration string like '3h' or '30m' into seconds.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
duration_str: A string representing a duration (e.g., "3h", "30m", "1d")
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
The duration in seconds, or None if parsing failed
|
|
104
|
+
"""
|
|
105
|
+
if not duration:
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
seconds = timeparse(duration)
|
|
109
|
+
return seconds
|
|
110
|
+
|
|
111
|
+
|
|
72
112
|
def paasta_autoscale(args):
|
|
73
113
|
log.setLevel(logging.DEBUG)
|
|
74
114
|
service = figure_out_service_name(args)
|
|
115
|
+
|
|
116
|
+
if args.set_min is not None and not args.duration:
|
|
117
|
+
print(
|
|
118
|
+
PaastaColors.yellow(
|
|
119
|
+
"WARNING: --set-min requires --for parameter to specify duration - defaulting to 30m"
|
|
120
|
+
)
|
|
121
|
+
)
|
|
122
|
+
args.duration = "30m"
|
|
123
|
+
|
|
124
|
+
if args.duration is not None and args.set_min is None:
|
|
125
|
+
print(PaastaColors.red("Error: --for requires --set-min parameter"))
|
|
126
|
+
return 1
|
|
127
|
+
|
|
128
|
+
if args.set is not None and args.set_min is not None:
|
|
129
|
+
print(
|
|
130
|
+
PaastaColors.red(
|
|
131
|
+
"Error: Cannot use both --set and --set-min at the same time"
|
|
132
|
+
)
|
|
133
|
+
)
|
|
134
|
+
return 1
|
|
135
|
+
|
|
75
136
|
instance_config = next(
|
|
76
137
|
get_instance_configs_for_service(
|
|
77
138
|
service=service,
|
|
@@ -83,7 +144,7 @@ def paasta_autoscale(args):
|
|
|
83
144
|
)
|
|
84
145
|
if not instance_config:
|
|
85
146
|
print(
|
|
86
|
-
"Could not find config files for this service instance in soaconfigs. Maybe you
|
|
147
|
+
"Could not find config files for this service instance in soaconfigs. Maybe you misspelled an argument?"
|
|
87
148
|
)
|
|
88
149
|
return 1
|
|
89
150
|
|
|
@@ -99,12 +160,15 @@ def paasta_autoscale(args):
|
|
|
99
160
|
return 1
|
|
100
161
|
|
|
101
162
|
try:
|
|
102
|
-
|
|
163
|
+
# get current autoscaler count
|
|
164
|
+
if args.set is None and args.set_min is None:
|
|
103
165
|
log.debug("Getting the current autoscaler count...")
|
|
104
166
|
res, status, _ = api.autoscaler.get_autoscaler_count(
|
|
105
167
|
service=service, instance=args.instance, _return_http_data_only=False
|
|
106
168
|
)
|
|
107
|
-
|
|
169
|
+
|
|
170
|
+
# set desired instances
|
|
171
|
+
elif args.set is not None:
|
|
108
172
|
log.debug(f"Setting desired instances to {args.set}.")
|
|
109
173
|
msg = paastamodels.AutoscalerCountMsg(desired_instances=int(args.set))
|
|
110
174
|
res, status, _ = api.autoscaler.update_autoscaler_count(
|
|
@@ -121,16 +185,53 @@ def paasta_autoscale(args):
|
|
|
121
185
|
instance=args.instance,
|
|
122
186
|
cluster=args.cluster,
|
|
123
187
|
)
|
|
188
|
+
|
|
189
|
+
# set lower bound
|
|
190
|
+
elif args.set_min is not None:
|
|
191
|
+
duration_seconds = parse_duration_to_seconds(args.duration)
|
|
192
|
+
if not duration_seconds:
|
|
193
|
+
print(
|
|
194
|
+
PaastaColors.red(
|
|
195
|
+
f"Error: Invalid duration format '{args.duration}'. "
|
|
196
|
+
f"Please use a format like '3h' or '30m'."
|
|
197
|
+
)
|
|
198
|
+
)
|
|
199
|
+
return 1
|
|
200
|
+
# NOTE: this is explicitly using time.time() since we're doing everything using epoch time
|
|
201
|
+
# for simplicity
|
|
202
|
+
expiration_time = time.time() + duration_seconds
|
|
203
|
+
|
|
204
|
+
log.debug(
|
|
205
|
+
f"Setting minimum instances to {args.set_min} for duration {args.duration}."
|
|
206
|
+
)
|
|
207
|
+
msg = paastamodels.AutoscalingOverride(
|
|
208
|
+
min_instances=args.set_min,
|
|
209
|
+
expire_after=expiration_time,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
res, status, _ = api.autoscaler.set_autoscaling_override(
|
|
213
|
+
service=service,
|
|
214
|
+
instance=args.instance,
|
|
215
|
+
autoscaling_override=msg,
|
|
216
|
+
_return_http_data_only=False,
|
|
217
|
+
)
|
|
218
|
+
_log_audit(
|
|
219
|
+
action="manual-autoscale-override",
|
|
220
|
+
action_details=str(msg),
|
|
221
|
+
service=service,
|
|
222
|
+
instance=args.instance,
|
|
223
|
+
cluster=args.cluster,
|
|
224
|
+
)
|
|
124
225
|
except api.api_error as exc:
|
|
125
226
|
status = exc.status
|
|
126
227
|
|
|
127
228
|
if not 200 <= status <= 299:
|
|
128
229
|
print(
|
|
129
230
|
PaastaColors.red(
|
|
130
|
-
f"ERROR: '{args.instance}' is not configured to autoscale, "
|
|
131
|
-
f"
|
|
231
|
+
f"ERROR: '{args.instance}' is not configured to autoscale OR you set min_instances above the current max_instances, "
|
|
232
|
+
f"and `paasta autoscale` could not update it. "
|
|
132
233
|
f"If you want to be able to boost this service, please configure autoscaling for the service "
|
|
133
|
-
f"in its config file by setting min and max instances. Example: \n"
|
|
234
|
+
f"in its config file by setting min and max instances appropriately. Example: \n"
|
|
134
235
|
f"{args.instance}:\n"
|
|
135
236
|
f" min_instances: 5\n"
|
|
136
237
|
f" max_instances: 50"
|
|
@@ -139,5 +240,23 @@ def paasta_autoscale(args):
|
|
|
139
240
|
return 0
|
|
140
241
|
|
|
141
242
|
log.debug(f"Res: {res} Http: {status}")
|
|
142
|
-
|
|
243
|
+
if not args.set_min:
|
|
244
|
+
print(f"Desired instances: {res.desired_instances}")
|
|
245
|
+
elif args.set_min:
|
|
246
|
+
print(
|
|
247
|
+
f"Temporary override set for {args.service}.{args.instance} with minimum instances: {args.set_min}"
|
|
248
|
+
)
|
|
249
|
+
# folks using this might be in different timezones, so let's convert the expiration time to a few common ones
|
|
250
|
+
# to make it extra clear when the override will expire
|
|
251
|
+
epoch_time = datetime.fromtimestamp(res.expire_after)
|
|
252
|
+
eastern_time = epoch_time.astimezone(pytz.timezone("US/Eastern"))
|
|
253
|
+
pacific_time = epoch_time.astimezone(pytz.timezone("US/Pacific"))
|
|
254
|
+
london_time = epoch_time.astimezone(pytz.timezone("Europe/London"))
|
|
255
|
+
|
|
256
|
+
time_format = "%Y-%m-%d %H:%M:%S %Z%z"
|
|
257
|
+
print(f"The {args.duration} override will expire at:")
|
|
258
|
+
print(f"Eastern Time: {eastern_time.strftime(time_format)}")
|
|
259
|
+
print(f"Pacific Time: {pacific_time.strftime(time_format)}")
|
|
260
|
+
print(f"London Time: {london_time.strftime(time_format)}")
|
|
261
|
+
|
|
143
262
|
return 0
|
paasta_tools/cli/cmds/logs.py
CHANGED
|
@@ -178,7 +178,7 @@ def add_subparser(subparsers) -> None:
|
|
|
178
178
|
help=(
|
|
179
179
|
"The time to get logs up to. "
|
|
180
180
|
'This can be an ISO-8601 timestamp or a human readable duration parsable by pytimeparse such as "5m", "1d3h" etc. '
|
|
181
|
-
"
|
|
181
|
+
"Incompatible with --line-offset and --lines. "
|
|
182
182
|
"Defaults to now."
|
|
183
183
|
),
|
|
184
184
|
)
|
|
@@ -190,7 +190,7 @@ def add_subparser(subparsers) -> None:
|
|
|
190
190
|
help=(
|
|
191
191
|
"The number of lines to retrieve from the specified offset. "
|
|
192
192
|
'May optionally be prefixed with a "+" or "-" to specify which direction from the offset. '
|
|
193
|
-
"
|
|
193
|
+
"Incompatible with --from and --to. "
|
|
194
194
|
'Defaults to "-100".'
|
|
195
195
|
),
|
|
196
196
|
type=int,
|
|
@@ -204,7 +204,7 @@ def add_subparser(subparsers) -> None:
|
|
|
204
204
|
"For example, --line-offset 1 would be the first line. "
|
|
205
205
|
"Paired with --lines, --line-offset +100 would give you the first 100 lines of logs. "
|
|
206
206
|
"Some logging backends may not support line offsetting by time or lines. "
|
|
207
|
-
"
|
|
207
|
+
"Incompatible with --from and --to. "
|
|
208
208
|
"Defaults to the latest line's offset."
|
|
209
209
|
),
|
|
210
210
|
type=int,
|
|
@@ -804,7 +804,7 @@ class ScribeLogReader(LogReader):
|
|
|
804
804
|
# sure all the tailers are still running.
|
|
805
805
|
running_processes = [tt.is_alive() for tt in spawned_processes]
|
|
806
806
|
if not running_processes or not all(running_processes):
|
|
807
|
-
log.
|
|
807
|
+
log.warning(
|
|
808
808
|
"Quitting because I expected %d log tailers to be alive but only %d are alive."
|
|
809
809
|
% (len(spawned_processes), running_processes.count(True))
|
|
810
810
|
)
|
|
@@ -819,12 +819,12 @@ class ScribeLogReader(LogReader):
|
|
|
819
819
|
# This extra nested catch is because it's pretty easy to be in
|
|
820
820
|
# the above try block when the user hits Ctrl-C which otherwise
|
|
821
821
|
# dumps a stack trace.
|
|
822
|
-
log.
|
|
822
|
+
log.warning("Terminating.")
|
|
823
823
|
break
|
|
824
824
|
except KeyboardInterrupt:
|
|
825
825
|
# Die peacefully rather than printing N threads worth of stack
|
|
826
826
|
# traces.
|
|
827
|
-
log.
|
|
827
|
+
log.warning("Terminating.")
|
|
828
828
|
break
|
|
829
829
|
|
|
830
830
|
def print_logs_by_time(
|
|
@@ -1019,7 +1019,7 @@ class ScribeLogReader(LogReader):
|
|
|
1019
1019
|
tzinfo=pytz.utc
|
|
1020
1020
|
) - datetime.timedelta(hours=4)
|
|
1021
1021
|
if end_time > warning_end_time:
|
|
1022
|
-
log.
|
|
1022
|
+
log.warning("Recent logs might be incomplete. Consider tailing instead.")
|
|
1023
1023
|
|
|
1024
1024
|
# scribereader, sadly, is not based on UTC timestamps. It uses YST
|
|
1025
1025
|
# dates instead.
|
|
@@ -10,7 +10,10 @@
|
|
|
10
10
|
"type": "string"
|
|
11
11
|
},
|
|
12
12
|
"git_url": {
|
|
13
|
-
"type":
|
|
13
|
+
"type": [
|
|
14
|
+
"string",
|
|
15
|
+
"null"
|
|
16
|
+
],
|
|
14
17
|
"pattern": "^git@github.yelpcorp.com:[-a-z]+/[-_a-z0-9]+(\\.git)?$",
|
|
15
18
|
"$comment": "this is obviously very tied to how we name repos at Yelp"
|
|
16
19
|
},
|
|
@@ -130,6 +130,10 @@ def get_deploy_group_mappings(
|
|
|
130
130
|
v2_mappings: V2_Mappings = {"deployments": {}, "controls": {}}
|
|
131
131
|
git_url = get_git_url(service=service, soa_dir=soa_dir)
|
|
132
132
|
|
|
133
|
+
# Some pseudo-services like toolboxes explicitly have no git_url, and therefore no deployments
|
|
134
|
+
if git_url is None:
|
|
135
|
+
return mappings, v2_mappings
|
|
136
|
+
|
|
133
137
|
# Most of the time of this function is in two parts:
|
|
134
138
|
# 1. getting remote refs from git. (Mostly IO, just waiting for git to get back to us.)
|
|
135
139
|
# 2. loading instance configs. (Mostly CPU, copy.deepcopying yaml over and over again)
|
|
@@ -18,6 +18,7 @@ from paasta_tools.kubernetes_tools import create_job
|
|
|
18
18
|
from paasta_tools.kubernetes_tools import create_pod_disruption_budget
|
|
19
19
|
from paasta_tools.kubernetes_tools import create_stateful_set
|
|
20
20
|
from paasta_tools.kubernetes_tools import ensure_service_account
|
|
21
|
+
from paasta_tools.kubernetes_tools import HpaOverride
|
|
21
22
|
from paasta_tools.kubernetes_tools import KubeClient
|
|
22
23
|
from paasta_tools.kubernetes_tools import KubeDeployment
|
|
23
24
|
from paasta_tools.kubernetes_tools import KubernetesDeploymentConfig
|
|
@@ -219,6 +220,15 @@ class Application(ABC):
|
|
|
219
220
|
|
|
220
221
|
|
|
221
222
|
class DeploymentWrapper(Application):
|
|
223
|
+
def __init__(
|
|
224
|
+
self,
|
|
225
|
+
item: Union[V1Deployment, V1StatefulSet],
|
|
226
|
+
logging=logging.getLogger(__name__),
|
|
227
|
+
hpa_override: Optional[HpaOverride] = None,
|
|
228
|
+
) -> None:
|
|
229
|
+
super().__init__(item, logging)
|
|
230
|
+
self.hpa_override = hpa_override
|
|
231
|
+
|
|
222
232
|
def deep_delete(
|
|
223
233
|
self, kube_client: KubeClient, propagation_policy="Foreground"
|
|
224
234
|
) -> None:
|
|
@@ -290,6 +300,9 @@ class DeploymentWrapper(Application):
|
|
|
290
300
|
cluster=self.soa_config.cluster,
|
|
291
301
|
kube_client=kube_client,
|
|
292
302
|
namespace=self.item.metadata.namespace,
|
|
303
|
+
min_instances_override=(
|
|
304
|
+
self.hpa_override["min_instances"] if self.hpa_override else None
|
|
305
|
+
),
|
|
293
306
|
)
|
|
294
307
|
|
|
295
308
|
hpa_exists = self.exists_hpa(kube_client)
|
|
@@ -461,11 +474,12 @@ class JobWrapper(Application):
|
|
|
461
474
|
|
|
462
475
|
|
|
463
476
|
def get_application_wrapper(
|
|
464
|
-
formatted_application: Union[V1Deployment, V1StatefulSet, V1Job]
|
|
477
|
+
formatted_application: Union[V1Deployment, V1StatefulSet, V1Job],
|
|
478
|
+
hpa_override: Optional[HpaOverride] = None,
|
|
465
479
|
) -> Application:
|
|
466
480
|
app: Application
|
|
467
481
|
if isinstance(formatted_application, V1Deployment):
|
|
468
|
-
app = DeploymentWrapper(formatted_application)
|
|
482
|
+
app = DeploymentWrapper(formatted_application, hpa_override=hpa_override)
|
|
469
483
|
elif isinstance(formatted_application, V1StatefulSet):
|
|
470
484
|
app = StatefulSetWrapper(formatted_application)
|
|
471
485
|
elif isinstance(formatted_application, V1Job):
|