paasta-tools 1.30.8__py3-none-any.whl → 1.30.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paasta_tools/__init__.py +1 -1
- paasta_tools/api/views/instance.py +9 -2
- paasta_tools/async_utils.py +4 -1
- paasta_tools/bounce_lib.py +8 -5
- paasta_tools/check_services_replication_tools.py +10 -4
- paasta_tools/check_spark_jobs.py +1 -1
- paasta_tools/cli/cli.py +4 -4
- paasta_tools/cli/cmds/logs.py +29 -7
- paasta_tools/cli/cmds/mark_for_deployment.py +2 -2
- paasta_tools/cli/cmds/mesh_status.py +1 -1
- paasta_tools/cli/cmds/remote_run.py +1 -1
- paasta_tools/cli/cmds/rollback.py +1 -1
- paasta_tools/cli/cmds/spark_run.py +3 -3
- paasta_tools/cli/cmds/status.py +24 -21
- paasta_tools/cli/cmds/validate.py +3 -3
- paasta_tools/cli/utils.py +32 -19
- paasta_tools/contrib/check_orphans.py +1 -1
- paasta_tools/contrib/get_running_task_allocation.py +1 -1
- paasta_tools/instance/kubernetes.py +2 -1
- paasta_tools/kubernetes_tools.py +2 -40
- paasta_tools/metrics/metastatus_lib.py +0 -24
- paasta_tools/metrics/metrics_lib.py +12 -3
- paasta_tools/setup_kubernetes_job.py +1 -1
- paasta_tools/setup_tron_namespace.py +2 -2
- paasta_tools/tron_tools.py +1 -1
- paasta_tools/utils.py +2 -9
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/check_orphans.py +1 -1
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/check_spark_jobs.py +1 -1
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/get_running_task_allocation.py +1 -1
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/setup_kubernetes_job.py +1 -1
- {paasta_tools-1.30.8.dist-info → paasta_tools-1.30.10.dist-info}/METADATA +2 -2
- {paasta_tools-1.30.8.dist-info → paasta_tools-1.30.10.dist-info}/RECORD +84 -89
- paasta_tools/frameworks/adhoc_scheduler.py +0 -71
- paasta_tools/frameworks/native_scheduler.py +0 -652
- paasta_tools/frameworks/task_store.py +0 -245
- paasta_tools/mesos_maintenance.py +0 -848
- paasta_tools/paasta_native_serviceinit.py +0 -21
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/apply_external_resources.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/bounce_log_latency_parser.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/check_autoscaler_max_instances.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/check_cassandracluster_services_replication.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/check_flink_services_health.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/check_kubernetes_api.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/check_kubernetes_services_replication.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/check_manual_oapi_changes.sh +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/check_oom_events.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/cleanup_kubernetes_cr.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/cleanup_kubernetes_crd.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/cleanup_kubernetes_jobs.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/create_dynamodb_table.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/create_paasta_playground.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/delete_kubernetes_deployments.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/emit_allocated_cpu_metrics.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/generate_all_deployments +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/generate_authenticating_services.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/generate_deployments_for_service.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/generate_services_file.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/generate_services_yaml.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/habitat_fixer.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/ide_helper.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/is_pod_healthy_in_proxy.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/is_pod_healthy_in_smartstack.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/kill_bad_containers.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/kubernetes_remove_evicted_pods.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/mass-deploy-tag.sh +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/mock_patch_checker.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/paasta_cleanup_remote_run_resources.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/paasta_cleanup_stale_nodes.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/paasta_deploy_tron_jobs +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/paasta_execute_docker_command.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/paasta_secrets_sync.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/paasta_tabcomplete.sh +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/paasta_update_soa_memcpu.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/render_template.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/rightsizer_soaconfigs_update.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/service_shard_remove.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/service_shard_update.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/setup_istio_mesh.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/setup_kubernetes_cr.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/setup_kubernetes_crd.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/setup_kubernetes_internal_crd.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/setup_prometheus_adapter_config.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/shared_ip_check.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/synapse_srv_namespaces_fact.py +0 -0
- {paasta_tools-1.30.8.data → paasta_tools-1.30.10.data}/scripts/timeouts_metrics_prom.py +0 -0
- {paasta_tools-1.30.8.dist-info → paasta_tools-1.30.10.dist-info}/WHEEL +0 -0
- {paasta_tools-1.30.8.dist-info → paasta_tools-1.30.10.dist-info}/entry_points.txt +0 -0
- {paasta_tools-1.30.8.dist-info → paasta_tools-1.30.10.dist-info}/licenses/LICENSE +0 -0
- {paasta_tools-1.30.8.dist-info → paasta_tools-1.30.10.dist-info}/top_level.txt +0 -0
|
@@ -1,848 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python
|
|
2
|
-
# Copyright 2015-2016 Yelp Inc.
|
|
3
|
-
#
|
|
4
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
# you may not use this file except in compliance with the License.
|
|
6
|
-
# You may obtain a copy of the License at
|
|
7
|
-
#
|
|
8
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
#
|
|
10
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
# See the License for the specific language governing permissions and
|
|
14
|
-
# limitations under the License.
|
|
15
|
-
import argparse
|
|
16
|
-
import datetime
|
|
17
|
-
import json
|
|
18
|
-
import logging
|
|
19
|
-
from socket import gaierror
|
|
20
|
-
from socket import getfqdn
|
|
21
|
-
from socket import gethostbyname
|
|
22
|
-
from typing import List
|
|
23
|
-
from typing import NamedTuple
|
|
24
|
-
from typing import Optional
|
|
25
|
-
|
|
26
|
-
import a_sync
|
|
27
|
-
from dateutil import parser
|
|
28
|
-
from pytimeparse import timeparse
|
|
29
|
-
from requests import Request
|
|
30
|
-
from requests import Session
|
|
31
|
-
from requests.exceptions import HTTPError
|
|
32
|
-
|
|
33
|
-
from paasta_tools.mesos_tools import get_count_running_tasks_on_slave
|
|
34
|
-
from paasta_tools.mesos_tools import get_mesos_config_path
|
|
35
|
-
from paasta_tools.mesos_tools import get_mesos_leader
|
|
36
|
-
from paasta_tools.mesos_tools import get_mesos_master
|
|
37
|
-
from paasta_tools.mesos_tools import MESOS_MASTER_PORT
|
|
38
|
-
from paasta_tools.utils import SystemPaastaConfig
|
|
39
|
-
from paasta_tools.utils import time_cache
|
|
40
|
-
from paasta_tools.utils import to_bytes
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
log = logging.getLogger(__name__)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class Hostname(NamedTuple):
|
|
47
|
-
host: str
|
|
48
|
-
ip: str
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
class Credentials(NamedTuple):
|
|
52
|
-
file: str
|
|
53
|
-
principal: str
|
|
54
|
-
secret: str
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class Resource(NamedTuple):
|
|
58
|
-
name: str
|
|
59
|
-
amount: int
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
MAINTENANCE_ROLE = "maintenance"
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def base_api(mesos_config_path: Optional[str] = None):
|
|
66
|
-
"""Helper function for making all API requests
|
|
67
|
-
|
|
68
|
-
:returns: a function that can be called to make a request
|
|
69
|
-
"""
|
|
70
|
-
leader = get_mesos_leader(mesos_config_path)
|
|
71
|
-
|
|
72
|
-
def execute_request(method, endpoint, timeout=(3, 2), **kwargs):
|
|
73
|
-
url = "http://%s:%d%s" % (leader, MESOS_MASTER_PORT, endpoint)
|
|
74
|
-
s = Session()
|
|
75
|
-
s.auth = (get_principal(), get_secret())
|
|
76
|
-
req = Request(method, url, **kwargs)
|
|
77
|
-
prepared = s.prepare_request(req)
|
|
78
|
-
try:
|
|
79
|
-
resp = s.send(prepared, timeout=timeout)
|
|
80
|
-
resp.raise_for_status()
|
|
81
|
-
return resp
|
|
82
|
-
except HTTPError:
|
|
83
|
-
raise HTTPError("Error executing API request calling %s." % url)
|
|
84
|
-
|
|
85
|
-
return execute_request
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def master_api(mesos_config_path: Optional[str] = None):
|
|
89
|
-
"""Helper function for making API requests to the /master API endpoints
|
|
90
|
-
|
|
91
|
-
:returns: a function that can be called to make a request to /master
|
|
92
|
-
"""
|
|
93
|
-
|
|
94
|
-
def execute_master_api_request(method, endpoint, **kwargs):
|
|
95
|
-
base_api_client = base_api(mesos_config_path=mesos_config_path)
|
|
96
|
-
return base_api_client(method, "/master%s" % endpoint, **kwargs)
|
|
97
|
-
|
|
98
|
-
return execute_master_api_request
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def operator_api(mesos_config_path: Optional[str] = None):
|
|
102
|
-
def execute_operator_api_request(**kwargs):
|
|
103
|
-
base_api_client = base_api(mesos_config_path=mesos_config_path)
|
|
104
|
-
if "headers" in kwargs:
|
|
105
|
-
kwargs["headers"]["Content-Type"] = "application/json"
|
|
106
|
-
else:
|
|
107
|
-
kwargs["headers"] = {"Content-Type": "application/json"}
|
|
108
|
-
data = kwargs.pop("data")
|
|
109
|
-
return base_api_client("POST", "/api/v1", data=json.dumps(data), **kwargs)
|
|
110
|
-
|
|
111
|
-
return execute_operator_api_request
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def reserve_api():
|
|
115
|
-
"""Helper function for making API requests to the /reserve API endpoints
|
|
116
|
-
|
|
117
|
-
:returns: a function that can be called to make a request to /reserve
|
|
118
|
-
"""
|
|
119
|
-
|
|
120
|
-
def execute_reserve_api_request(method, endpoint, **kwargs):
|
|
121
|
-
master_api_client = master_api()
|
|
122
|
-
return master_api_client(method, "/reserve%s" % endpoint, **kwargs)
|
|
123
|
-
|
|
124
|
-
return execute_reserve_api_request
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def unreserve_api():
|
|
128
|
-
"""Helper function for making API requests to the /unreserve API endpoints
|
|
129
|
-
|
|
130
|
-
:returns: a function that can be called to make a request to /unreserve
|
|
131
|
-
"""
|
|
132
|
-
|
|
133
|
-
def execute_unreserve_api_request(method, endpoint, **kwargs):
|
|
134
|
-
master_api_client = master_api()
|
|
135
|
-
return master_api_client(method, "/unreserve%s" % endpoint, **kwargs)
|
|
136
|
-
|
|
137
|
-
return execute_unreserve_api_request
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def maintenance_api():
|
|
141
|
-
"""Helper function for making API requests to the /master/maintenance API endpoints
|
|
142
|
-
|
|
143
|
-
:returns: a function that can be called to make a request to /master/maintenance
|
|
144
|
-
"""
|
|
145
|
-
|
|
146
|
-
def execute_schedule_api_request(method, endpoint, **kwargs):
|
|
147
|
-
master_api_client = master_api()
|
|
148
|
-
return master_api_client(
|
|
149
|
-
method, "/maintenance%s" % endpoint, timeout=(3, 10), **kwargs
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
return execute_schedule_api_request
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
def get_schedule_client():
|
|
156
|
-
"""Helper function for making API requests to the /master/maintenance/schedule API endpoints
|
|
157
|
-
|
|
158
|
-
:returns: a function that can be called to make a request to /master/maintenance/schedule
|
|
159
|
-
"""
|
|
160
|
-
|
|
161
|
-
def execute_schedule_api_request(method, endpoint, **kwargs):
|
|
162
|
-
maintenance_api_client = maintenance_api()
|
|
163
|
-
return maintenance_api_client(method, "/schedule%s" % endpoint, **kwargs)
|
|
164
|
-
|
|
165
|
-
return execute_schedule_api_request
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
def get_maintenance_schedule():
|
|
169
|
-
"""Makes a GET_MAINTENANCE_SCHEDULE request to the operator api
|
|
170
|
-
|
|
171
|
-
:returns: a GET_MAINTENANCE_SCHEDULE response
|
|
172
|
-
"""
|
|
173
|
-
client_fn = operator_api()
|
|
174
|
-
return client_fn(data={"type": "GET_MAINTENANCE_SCHEDULE"})
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
@time_cache(ttl=10)
|
|
178
|
-
def get_maintenance_status(mesos_config_path: Optional[str] = None):
|
|
179
|
-
"""Makes a GET_MAINTENANCE_STATUS request to the operator api
|
|
180
|
-
|
|
181
|
-
:returns: a GET_MAINTENANCE_STATUS response
|
|
182
|
-
"""
|
|
183
|
-
client_fn = operator_api(mesos_config_path=mesos_config_path)
|
|
184
|
-
return client_fn(data={"type": "GET_MAINTENANCE_STATUS"})
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
def schedule():
|
|
188
|
-
"""Get the Mesos maintenance schedule. This contains hostname/ip mappings and their maintenance window.
|
|
189
|
-
:returns: GET_MAINTENANCE_SCHEDULE response text
|
|
190
|
-
"""
|
|
191
|
-
try:
|
|
192
|
-
schedule = get_maintenance_schedule()
|
|
193
|
-
except HTTPError:
|
|
194
|
-
raise HTTPError("Error getting maintenance schedule.")
|
|
195
|
-
return schedule.text
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
def get_hosts_with_state(
|
|
199
|
-
state, system_paasta_config: Optional[SystemPaastaConfig] = None
|
|
200
|
-
) -> List[str]:
|
|
201
|
-
"""Helper function to check the maintenance status and return all hosts
|
|
202
|
-
listed as being in a current state
|
|
203
|
-
|
|
204
|
-
:param state: State we are interested in ('down_machines' or 'draining_machines')
|
|
205
|
-
:returns: A list of hostnames in the specified state or an empty list if no machines
|
|
206
|
-
"""
|
|
207
|
-
|
|
208
|
-
mesos_config_path = get_mesos_config_path(system_paasta_config)
|
|
209
|
-
try:
|
|
210
|
-
status = get_maintenance_status(mesos_config_path).json()
|
|
211
|
-
status = status["get_maintenance_status"]["status"]
|
|
212
|
-
except HTTPError:
|
|
213
|
-
raise HTTPError("Error getting maintenance status.")
|
|
214
|
-
if not status or state not in status:
|
|
215
|
-
return []
|
|
216
|
-
if "id" in status[state][0]:
|
|
217
|
-
return [machine["id"]["hostname"] for machine in status[state]]
|
|
218
|
-
else:
|
|
219
|
-
return [machine["hostname"] for machine in status[state]]
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
def get_draining_hosts(system_paasta_config: Optional[SystemPaastaConfig] = None):
|
|
223
|
-
"""Returns a list of hostnames that are marked as draining
|
|
224
|
-
|
|
225
|
-
:returns: a list of strings representing hostnames
|
|
226
|
-
"""
|
|
227
|
-
return get_hosts_with_state(
|
|
228
|
-
state="draining_machines", system_paasta_config=system_paasta_config
|
|
229
|
-
)
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
def get_down_hosts():
|
|
233
|
-
"""Returns a list of hostnames that are marked as down
|
|
234
|
-
|
|
235
|
-
:returns: a list of strings representing hostnames
|
|
236
|
-
"""
|
|
237
|
-
return get_hosts_with_state(state="down_machines")
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
def is_host_draining(hostname=getfqdn()):
|
|
241
|
-
"""Checks if the specified hostname is marked as draining
|
|
242
|
-
|
|
243
|
-
:param hostname: Hostname we want to check if draining (defaults to current host)
|
|
244
|
-
:returns: a boolean representing whether or not the specified hostname is draining
|
|
245
|
-
"""
|
|
246
|
-
return hostname in get_draining_hosts()
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
def is_host_down(hostname=getfqdn()):
|
|
250
|
-
"""Checks if the specified hostname is marked as down
|
|
251
|
-
|
|
252
|
-
:param hostname: Hostname we want to check if down (defaults to current host)
|
|
253
|
-
:returns: a boolean representing whether or not the specified hostname is down
|
|
254
|
-
"""
|
|
255
|
-
return hostname in get_down_hosts()
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
def get_hosts_forgotten_draining(grace=0):
|
|
259
|
-
"""Find hosts that are still marked as draining (rather than down) after the start
|
|
260
|
-
of their maintenance window.
|
|
261
|
-
:param grace: integer number of nanoseconds to allow a host to be left in the draining
|
|
262
|
-
state after the start of its maintenance window before we consider it forgotten.
|
|
263
|
-
:returns: a list of hostnames of hosts forgotten draining
|
|
264
|
-
"""
|
|
265
|
-
draining_hosts = get_draining_hosts()
|
|
266
|
-
log.debug("draining_hosts: %s" % draining_hosts)
|
|
267
|
-
|
|
268
|
-
hosts_past_maintenance_start = get_hosts_past_maintenance_start(grace=grace)
|
|
269
|
-
log.debug("hosts_past_maintenance_start: %s" % hosts_past_maintenance_start)
|
|
270
|
-
|
|
271
|
-
forgotten_draining = list(
|
|
272
|
-
set(draining_hosts).intersection(hosts_past_maintenance_start)
|
|
273
|
-
)
|
|
274
|
-
log.debug("forgotten_draining: %s" % forgotten_draining)
|
|
275
|
-
|
|
276
|
-
return forgotten_draining
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
def are_hosts_forgotten_draining():
|
|
280
|
-
"""Quick way to test if there are any forgotten draining hosts.
|
|
281
|
-
:returns: a boolean that is True if there are any forgotten draining
|
|
282
|
-
hosts and False otherwise
|
|
283
|
-
"""
|
|
284
|
-
return bool(get_hosts_forgotten_draining())
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
def get_hosts_forgotten_down(grace=0):
|
|
288
|
-
"""Find hosts that are still marked as down (rather than up) after the end
|
|
289
|
-
of their maintenance window.
|
|
290
|
-
:param grace: integer number of nanoseconds to allow a host to be left in the down
|
|
291
|
-
state after the end of its maintenance window before we consider it forgotten.
|
|
292
|
-
:returns: a list of hostnames of hosts forgotten down
|
|
293
|
-
"""
|
|
294
|
-
down_hosts = get_down_hosts()
|
|
295
|
-
log.debug("down_hosts: %s" % down_hosts)
|
|
296
|
-
|
|
297
|
-
hosts_past_maintenance_end = get_hosts_past_maintenance_end(grace=grace)
|
|
298
|
-
log.debug("hosts_past_maintenance_end: %s" % hosts_past_maintenance_end)
|
|
299
|
-
|
|
300
|
-
forgotten_down = list(set(down_hosts).intersection(hosts_past_maintenance_end))
|
|
301
|
-
log.debug("forgotten_down: %s" % forgotten_down)
|
|
302
|
-
|
|
303
|
-
return forgotten_down
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
def are_hosts_forgotten_down():
|
|
307
|
-
"""Quick way to test if there are any forgotten down hosts.
|
|
308
|
-
:returns: a boolean that is True if there are any forgotten down
|
|
309
|
-
hosts and False otherwise
|
|
310
|
-
"""
|
|
311
|
-
return bool(get_hosts_forgotten_down())
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
def parse_timedelta(value):
|
|
315
|
-
"""Return the delta in nanoseconds.
|
|
316
|
-
:param value: a string containing a time format supported by :mod:`pytimeparse`
|
|
317
|
-
:returns: an integer (or float) representing the specified delta in nanoseconds
|
|
318
|
-
"""
|
|
319
|
-
error_msg = "'%s' is not a valid time expression" % value
|
|
320
|
-
try:
|
|
321
|
-
seconds = timeparse.timeparse(value)
|
|
322
|
-
except TypeError:
|
|
323
|
-
raise argparse.ArgumentTypeError(error_msg)
|
|
324
|
-
if not seconds:
|
|
325
|
-
raise argparse.ArgumentTypeError(error_msg)
|
|
326
|
-
return seconds_to_nanoseconds(seconds)
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
def parse_datetime(value):
|
|
330
|
-
"""Return the datetime in nanoseconds.
|
|
331
|
-
:param value: a string containing a datetime supported by :mod:`dateutil.parser`
|
|
332
|
-
:returns: an integer (or float) representing the specified datetime in nanoseconds
|
|
333
|
-
"""
|
|
334
|
-
error_msg = "'%s' is not a valid datetime expression" % value
|
|
335
|
-
try:
|
|
336
|
-
dt = parser.parse(value)
|
|
337
|
-
except Exception:
|
|
338
|
-
raise argparse.ArgumentTypeError(error_msg)
|
|
339
|
-
if not dt:
|
|
340
|
-
raise argparse.ArgumentTypeError(error_msg)
|
|
341
|
-
return datetime_to_nanoseconds(dt)
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
def datetime_seconds_from_now(seconds):
|
|
345
|
-
"""Given a number of seconds, returns a datetime object representing that number of seconds in the future from the
|
|
346
|
-
current time.
|
|
347
|
-
:param seconds: an integer representing a certain number of seconds
|
|
348
|
-
:returns: a datetime.timedelta representing now + the specified number of seconds
|
|
349
|
-
"""
|
|
350
|
-
return now() + datetime.timedelta(seconds=seconds)
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
def now():
|
|
354
|
-
"""Returns a datetime object representing the current time
|
|
355
|
-
|
|
356
|
-
:returns: a datetime.datetime object representing the current time
|
|
357
|
-
"""
|
|
358
|
-
return datetime.datetime.now()
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
def seconds_to_nanoseconds(seconds):
|
|
362
|
-
"""Convert the specified number of seconds to nanoseconds
|
|
363
|
-
:param seconds: an integer representing a certain number of seconds
|
|
364
|
-
:returns: an integer (or float) representation of the specified number of seconds as nanoseconds
|
|
365
|
-
"""
|
|
366
|
-
return seconds * 1000000000
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
def datetime_to_nanoseconds(dt):
|
|
370
|
-
"""Convert the provided datetime object into nanoseconds
|
|
371
|
-
|
|
372
|
-
:returns: an integer (or float) representation of the specified datetime as nanoseconds
|
|
373
|
-
"""
|
|
374
|
-
return seconds_to_nanoseconds(int(dt.strftime("%s")))
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
def build_maintenance_payload(hostnames, maint_type):
|
|
378
|
-
"""Creates the JSON payload necessary to bring the specified hostnames up/down for maintenance.
|
|
379
|
-
:param hostnames: a list of hostnames
|
|
380
|
-
:returns: a dictionary representing the list of machines to bring up/down for maintenance
|
|
381
|
-
"""
|
|
382
|
-
return {
|
|
383
|
-
"type": maint_type.upper(),
|
|
384
|
-
maint_type.lower(): {"machines": get_machine_ids(hostnames)},
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
def hostnames_to_components(hostnames, resolve=False):
|
|
389
|
-
"""Converts a list of 'host[|ip]' entries into namedtuples containing 'host' and 'ip' attributes,
|
|
390
|
-
optionally performing a DNS lookup to resolve the hostname into an IP address
|
|
391
|
-
:param hostnames: a list of hostnames where each hostname can be of the form 'host[|ip]'
|
|
392
|
-
:param resolve: boolean representing whether to lookup the IP address corresponding to the hostname via DNS
|
|
393
|
-
:returns: a namedtuple containing the hostname and IP components
|
|
394
|
-
"""
|
|
395
|
-
|
|
396
|
-
components = []
|
|
397
|
-
for hostname in hostnames:
|
|
398
|
-
# This is to allow specifying a hostname as "hostname|ipaddress"
|
|
399
|
-
# to avoid querying DNS for the IP.
|
|
400
|
-
if "|" in hostname:
|
|
401
|
-
(host, ip) = hostname.split("|")
|
|
402
|
-
components.append(Hostname(host=host, ip=ip))
|
|
403
|
-
else:
|
|
404
|
-
try:
|
|
405
|
-
ip = gethostbyname(hostname) if resolve else None
|
|
406
|
-
except gaierror:
|
|
407
|
-
log.error(f"Failed to resolve IP for {hostname}, continuing regardless")
|
|
408
|
-
continue
|
|
409
|
-
components.append(Hostname(host=hostname, ip=ip))
|
|
410
|
-
return components
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
def get_machine_ids(hostnames):
|
|
414
|
-
"""Helper function to convert a list of hostnames into a JSON list of hostname/ip pairs.
|
|
415
|
-
:param hostnames: a list of hostnames
|
|
416
|
-
:returns: a dictionary representing the list of machines to bring up/down for maintenance
|
|
417
|
-
"""
|
|
418
|
-
machine_ids = []
|
|
419
|
-
components = hostnames_to_components(hostnames, resolve=True)
|
|
420
|
-
for component in components:
|
|
421
|
-
machine_id = {"hostname": component.host, "ip": component.ip}
|
|
422
|
-
machine_ids.append(machine_id)
|
|
423
|
-
return machine_ids
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
def build_reservation_payload(resources):
|
|
427
|
-
"""Creates the JSON payload needed to dynamically (un)reserve resources in mesos.
|
|
428
|
-
:param resources: list of Resource named tuples specifying the name and amount of the resource to (un)reserve
|
|
429
|
-
:returns: a dictionary that can be sent to Mesos to (un)reserve resources
|
|
430
|
-
"""
|
|
431
|
-
payload = []
|
|
432
|
-
for resource in resources:
|
|
433
|
-
payload.append(
|
|
434
|
-
{
|
|
435
|
-
"name": resource.name,
|
|
436
|
-
"type": "SCALAR",
|
|
437
|
-
"scalar": {"value": resource.amount},
|
|
438
|
-
"role": MAINTENANCE_ROLE,
|
|
439
|
-
"reservation": {"principal": get_principal()},
|
|
440
|
-
}
|
|
441
|
-
)
|
|
442
|
-
return payload
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
def build_maintenance_schedule_payload(
|
|
446
|
-
hostnames, start=None, duration=None, drain=True
|
|
447
|
-
):
|
|
448
|
-
"""Creates the JSON payload needed to (un)schedule maintenance on the specified hostnames.
|
|
449
|
-
:param hostnames: a list of hostnames
|
|
450
|
-
:param start: the time to start the maintenance, represented as number of nanoseconds since the epoch
|
|
451
|
-
:param duration: length of the maintenance window, represented as number of nanoseconds since the epoch
|
|
452
|
-
:param drain: boolean to note whether we are draining (True) the specified hosts or undraining (False) them
|
|
453
|
-
:returns: a dictionary that can be sent to Mesos to (un)schedule maintenance
|
|
454
|
-
"""
|
|
455
|
-
schedule = get_maintenance_schedule().json()["get_maintenance_schedule"]["schedule"]
|
|
456
|
-
machine_ids = get_machine_ids(hostnames)
|
|
457
|
-
|
|
458
|
-
if drain:
|
|
459
|
-
unavailability = dict()
|
|
460
|
-
unavailability["start"] = dict()
|
|
461
|
-
unavailability["start"]["nanoseconds"] = int(start)
|
|
462
|
-
unavailability["duration"] = dict()
|
|
463
|
-
unavailability["duration"]["nanoseconds"] = int(duration)
|
|
464
|
-
|
|
465
|
-
window = dict()
|
|
466
|
-
window["machine_ids"] = machine_ids
|
|
467
|
-
window["unavailability"] = unavailability
|
|
468
|
-
|
|
469
|
-
if schedule:
|
|
470
|
-
for existing_window in schedule["windows"]:
|
|
471
|
-
for existing_machine_id in existing_window["machine_ids"]:
|
|
472
|
-
# If we already have a maintenance window scheduled for one of the hosts,
|
|
473
|
-
# replace it with the new window.
|
|
474
|
-
if existing_machine_id in machine_ids:
|
|
475
|
-
existing_window["machine_ids"].remove(existing_machine_id)
|
|
476
|
-
if not existing_window["machine_ids"]:
|
|
477
|
-
schedule["windows"].remove(existing_window)
|
|
478
|
-
if drain:
|
|
479
|
-
windows = schedule["windows"] + [window]
|
|
480
|
-
else:
|
|
481
|
-
windows = schedule["windows"]
|
|
482
|
-
elif drain:
|
|
483
|
-
windows = [window]
|
|
484
|
-
else:
|
|
485
|
-
windows = []
|
|
486
|
-
|
|
487
|
-
payload = dict()
|
|
488
|
-
payload["windows"] = windows
|
|
489
|
-
|
|
490
|
-
return {
|
|
491
|
-
"type": "UPDATE_MAINTENANCE_SCHEDULE",
|
|
492
|
-
"update_maintenance_schedule": {"schedule": payload},
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
def load_credentials(mesos_secrets="/nail/etc/mesos-slave-secret"):
|
|
497
|
-
"""Loads the mesos-slave credentials from the specified file. These credentials will be used for all
|
|
498
|
-
maintenance API requests.
|
|
499
|
-
:param mesos_secrets: optional argument specifying the path to the file containing the mesos-slave credentials
|
|
500
|
-
:returns: a tuple of the form (username, password)
|
|
501
|
-
"""
|
|
502
|
-
try:
|
|
503
|
-
with open(mesos_secrets) as data_file:
|
|
504
|
-
data = json.load(data_file)
|
|
505
|
-
except EnvironmentError:
|
|
506
|
-
log.error(
|
|
507
|
-
"maintenance calls must be run on a Mesos slave containing valid credentials (%s)"
|
|
508
|
-
% mesos_secrets
|
|
509
|
-
)
|
|
510
|
-
raise
|
|
511
|
-
try:
|
|
512
|
-
username = data["principal"]
|
|
513
|
-
password = data["secret"]
|
|
514
|
-
except KeyError:
|
|
515
|
-
log.error(
|
|
516
|
-
"%s does not contain Mesos slave credentials in the expected format. "
|
|
517
|
-
"See http://mesos.apache.org/documentation/latest/authentication/ for details"
|
|
518
|
-
% mesos_secrets
|
|
519
|
-
)
|
|
520
|
-
raise
|
|
521
|
-
return Credentials(file=mesos_secrets, principal=username, secret=password)
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
def get_principal(mesos_secrets="/nail/etc/mesos-slave-secret"):
|
|
525
|
-
"""Helper function to get the principal from the mesos-slave credentials
|
|
526
|
-
:param mesos_secrets: optional argument specifying the path to the file containing the mesos-slave credentials
|
|
527
|
-
:returns: a string containing the principal/username
|
|
528
|
-
"""
|
|
529
|
-
return load_credentials(mesos_secrets).principal
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
def get_secret(mesos_secrets="/nail/etc/mesos-slave-secret"):
|
|
533
|
-
"""Helper function to get the secret from the mesos-slave credentials
|
|
534
|
-
:param mesos_secrets: optional argument specifying the path to the file containing the mesos-slave credentials
|
|
535
|
-
:returns: a string containing the secret/password
|
|
536
|
-
"""
|
|
537
|
-
return load_credentials(mesos_secrets).secret
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
def _make_request_payload(slave_id, reservation_payload):
|
|
541
|
-
return {
|
|
542
|
-
"slaveId": slave_id.encode("UTF-8"),
|
|
543
|
-
# We used to_bytes here since py2 json doesn't have a well defined
|
|
544
|
-
# return type. When moving to python 3, replace with .encode()
|
|
545
|
-
"resources": to_bytes(json.dumps(reservation_payload)).replace(b"+", b"%20"),
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
def _make_operator_reservation_request_payload(slave_id, payload, request_type):
|
|
550
|
-
return {
|
|
551
|
-
"type": request_type.upper(),
|
|
552
|
-
request_type.lower(): {"agent_id": {"value": slave_id}},
|
|
553
|
-
"resources": payload,
|
|
554
|
-
}
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
def reserve(slave_id, resources):
|
|
558
|
-
"""Dynamically reserve resources in mesos to prevent tasks from using them.
|
|
559
|
-
:param slave_id: the id of the mesos slave
|
|
560
|
-
:param resources: list of Resource named tuples specifying the name and amount of the resource to (un)reserve
|
|
561
|
-
:returns: boolean where 0 represents success and 1 is a failure
|
|
562
|
-
"""
|
|
563
|
-
log.info(f"Dynamically reserving resources on {slave_id}: {resources}")
|
|
564
|
-
payload = _make_operator_reservation_request_payload(
|
|
565
|
-
slave_id=slave_id,
|
|
566
|
-
payload=build_reservation_payload(resources),
|
|
567
|
-
request_type="reserve_resources",
|
|
568
|
-
)
|
|
569
|
-
client_fn = operator_api()
|
|
570
|
-
try:
|
|
571
|
-
print(payload)
|
|
572
|
-
reserve_output = client_fn(data=payload).text
|
|
573
|
-
except HTTPError:
|
|
574
|
-
raise HTTPError("Error adding dynamic reservation.")
|
|
575
|
-
return reserve_output
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
def unreserve(slave_id, resources):
|
|
579
|
-
"""Dynamically unreserve resources in mesos to allow tasks to using them.
|
|
580
|
-
:param slave_id: the id of the mesos slave
|
|
581
|
-
:param resources: list of Resource named tuples specifying the name and amount of the resource to (un)reserve
|
|
582
|
-
:returns: boolean where 0 represents success and 1 is a failure
|
|
583
|
-
"""
|
|
584
|
-
log.info(f"Dynamically unreserving resources on {slave_id}: {resources}")
|
|
585
|
-
payload = _make_operator_reservation_request_payload(
|
|
586
|
-
slave_id=slave_id,
|
|
587
|
-
payload=build_reservation_payload(resources),
|
|
588
|
-
request_type="unreserve_resources",
|
|
589
|
-
)
|
|
590
|
-
client_fn = operator_api()
|
|
591
|
-
try:
|
|
592
|
-
unreserve_output = client_fn(data=payload).text
|
|
593
|
-
except HTTPError:
|
|
594
|
-
raise HTTPError("Error adding dynamic unreservation.")
|
|
595
|
-
return unreserve_output
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
def components_to_hosts(components):
|
|
599
|
-
"""Convert a list of Component namedtuples to a list of their hosts
|
|
600
|
-
:param components: a list of Component namedtuples
|
|
601
|
-
:returns: list of the hosts associated with each Component
|
|
602
|
-
"""
|
|
603
|
-
hosts = []
|
|
604
|
-
for component in components:
|
|
605
|
-
hosts.append(component.host)
|
|
606
|
-
return hosts
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
def reserve_all_resources(hostnames):
|
|
610
|
-
"""Dynamically reserve all available resources on the specified hosts
|
|
611
|
-
:param hostnames: list of hostnames to reserve resources on
|
|
612
|
-
"""
|
|
613
|
-
mesos_state = a_sync.block(get_mesos_master().state_summary)
|
|
614
|
-
components = hostnames_to_components(hostnames)
|
|
615
|
-
hosts = components_to_hosts(components)
|
|
616
|
-
known_slaves = [
|
|
617
|
-
slave for slave in mesos_state["slaves"] if slave["hostname"] in hosts
|
|
618
|
-
]
|
|
619
|
-
for slave in known_slaves:
|
|
620
|
-
hostname = slave["hostname"]
|
|
621
|
-
log.info("Reserving all resources on %s" % hostname)
|
|
622
|
-
slave_id = slave["id"]
|
|
623
|
-
resources = []
|
|
624
|
-
for resource in ["disk", "mem", "cpus", "gpus"]:
|
|
625
|
-
free_resource = (
|
|
626
|
-
slave["resources"][resource] - slave["used_resources"][resource]
|
|
627
|
-
)
|
|
628
|
-
for role in slave["reserved_resources"]:
|
|
629
|
-
free_resource -= slave["reserved_resources"][role][resource]
|
|
630
|
-
resources.append(Resource(name=resource, amount=free_resource))
|
|
631
|
-
try:
|
|
632
|
-
reserve(slave_id=slave_id, resources=resources)
|
|
633
|
-
except HTTPError:
|
|
634
|
-
raise HTTPError(
|
|
635
|
-
f"Failed reserving all of the resources on {hostname} ({slave_id}). Aborting."
|
|
636
|
-
)
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
def unreserve_all_resources(hostnames):
|
|
640
|
-
"""Dynamically unreserve all available resources on the specified hosts
|
|
641
|
-
:param hostnames: list of hostnames to unreserve resources on
|
|
642
|
-
"""
|
|
643
|
-
mesos_state = a_sync.block(get_mesos_master().state_summary)
|
|
644
|
-
components = hostnames_to_components(hostnames)
|
|
645
|
-
hosts = components_to_hosts(components)
|
|
646
|
-
known_slaves = [
|
|
647
|
-
slave for slave in mesos_state["slaves"] if slave["hostname"] in hosts
|
|
648
|
-
]
|
|
649
|
-
for slave in known_slaves:
|
|
650
|
-
hostname = slave["hostname"]
|
|
651
|
-
log.info("Unreserving all resources on %s" % hostname)
|
|
652
|
-
slave_id = slave["id"]
|
|
653
|
-
resources = []
|
|
654
|
-
if MAINTENANCE_ROLE in slave["reserved_resources"]:
|
|
655
|
-
for resource in ["disk", "mem", "cpus", "gpus"]:
|
|
656
|
-
reserved_resource = slave["reserved_resources"][MAINTENANCE_ROLE][
|
|
657
|
-
resource
|
|
658
|
-
]
|
|
659
|
-
resources.append(Resource(name=resource, amount=reserved_resource))
|
|
660
|
-
try:
|
|
661
|
-
unreserve(slave_id=slave_id, resources=resources)
|
|
662
|
-
except HTTPError:
|
|
663
|
-
raise HTTPError(
|
|
664
|
-
f"Failed unreserving all of the resources on {hostname} ({slave_id}). Aborting."
|
|
665
|
-
)
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
def drain(hostnames, start, duration, reserve_resources=True):
|
|
669
|
-
"""Schedules a maintenance window for the specified hosts and marks them as draining.
|
|
670
|
-
:param hostnames: a list of hostnames
|
|
671
|
-
:param start: the time to start the maintenance, represented as number of nanoseconds since the epoch
|
|
672
|
-
:param duration: length of the maintenance window, represented as number of nanoseconds since the epoch
|
|
673
|
-
:param reserve_resources: bool setting to also reserve the free resources on the agent before the drain call
|
|
674
|
-
:returns: None
|
|
675
|
-
"""
|
|
676
|
-
log.info("Draining: %s" % hostnames)
|
|
677
|
-
if reserve_resources:
|
|
678
|
-
try:
|
|
679
|
-
reserve_all_resources(hostnames)
|
|
680
|
-
except HTTPError as e:
|
|
681
|
-
log.warning("Failed to reserve resources, will continue to drain: %s" % e)
|
|
682
|
-
payload = build_maintenance_schedule_payload(hostnames, start, duration, drain=True)
|
|
683
|
-
client_fn = operator_api()
|
|
684
|
-
try:
|
|
685
|
-
drain_output = client_fn(data=payload).text
|
|
686
|
-
except HTTPError:
|
|
687
|
-
raise HTTPError("Error performing maintenance drain.")
|
|
688
|
-
return drain_output
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
def undrain(hostnames, unreserve_resources=True):
|
|
692
|
-
"""Unschedules the maintenance window for the specified hosts and unmarks them as draining. They are ready for
|
|
693
|
-
regular use.
|
|
694
|
-
:param hostnames: a list of hostnames
|
|
695
|
-
:param unreserve_resources: bool setting to also unreserve resources on the agent before the undrain call
|
|
696
|
-
:returns: None
|
|
697
|
-
"""
|
|
698
|
-
log.info("Undraining: %s" % hostnames)
|
|
699
|
-
if unreserve_resources:
|
|
700
|
-
try:
|
|
701
|
-
unreserve_all_resources(hostnames)
|
|
702
|
-
except HTTPError as e:
|
|
703
|
-
log.warning(
|
|
704
|
-
"Failed to unreserve resources, will continue to undrain: %s" % e
|
|
705
|
-
)
|
|
706
|
-
payload = build_maintenance_schedule_payload(hostnames, drain=False)
|
|
707
|
-
client_fn = get_schedule_client()
|
|
708
|
-
client_fn = operator_api()
|
|
709
|
-
try:
|
|
710
|
-
undrain_output = client_fn(data=payload).text
|
|
711
|
-
except HTTPError:
|
|
712
|
-
raise HTTPError("Error performing maintenance undrain.")
|
|
713
|
-
return undrain_output
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
def down(hostnames):
|
|
717
|
-
"""Marks the specified hostnames as being down for maintenance, and makes them unavailable for use.
|
|
718
|
-
:param hostnames: a list of hostnames
|
|
719
|
-
:returns: None
|
|
720
|
-
"""
|
|
721
|
-
log.info("Bringing down: %s" % hostnames)
|
|
722
|
-
payload = build_maintenance_payload(hostnames, "start_maintenance")
|
|
723
|
-
client_fn = operator_api()
|
|
724
|
-
try:
|
|
725
|
-
down_output = client_fn(data=payload).text
|
|
726
|
-
except HTTPError:
|
|
727
|
-
raise HTTPError("Error performing maintenance down.")
|
|
728
|
-
return down_output
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
def up(hostnames):
|
|
732
|
-
"""Marks the specified hostnames as no longer being down for maintenance, and makes them available for use.
|
|
733
|
-
:param hostnames: a list of hostnames
|
|
734
|
-
:returns: None
|
|
735
|
-
"""
|
|
736
|
-
log.info("Bringing up: %s" % hostnames)
|
|
737
|
-
payload = build_maintenance_payload(hostnames, "stop_maintenance")
|
|
738
|
-
client_fn = operator_api()
|
|
739
|
-
try:
|
|
740
|
-
up_output = client_fn(data=payload).text
|
|
741
|
-
except HTTPError:
|
|
742
|
-
raise HTTPError("Error performing maintenance up.")
|
|
743
|
-
return up_output
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
def raw_status():
|
|
747
|
-
"""Get the Mesos maintenance status. This contains hostname/ip mappings for hosts that are either marked as being
|
|
748
|
-
down for maintenance or draining.
|
|
749
|
-
:returns: Response Object containing status
|
|
750
|
-
"""
|
|
751
|
-
try:
|
|
752
|
-
status = get_maintenance_status()
|
|
753
|
-
except HTTPError:
|
|
754
|
-
raise HTTPError("Error performing maintenance status.")
|
|
755
|
-
return status
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
def status():
|
|
759
|
-
"""Get the Mesos maintenance status. This contains hostname/ip mappings for hosts that are either marked as being
|
|
760
|
-
down for maintenance or draining.
|
|
761
|
-
:returns: Text representation of the status
|
|
762
|
-
"""
|
|
763
|
-
return raw_status().text
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
def friendly_status():
|
|
767
|
-
"""Display the Mesos maintenance status in a human-friendly way.
|
|
768
|
-
:returns: Text representation of the human-friendly status
|
|
769
|
-
"""
|
|
770
|
-
status = raw_status().json()["get_maintenance_status"]["status"]
|
|
771
|
-
ret = ""
|
|
772
|
-
for machine in status.get("draining_machines", []):
|
|
773
|
-
ret += "{} ({}): Draining\n".format(
|
|
774
|
-
machine["id"]["hostname"], machine["id"]["ip"]
|
|
775
|
-
)
|
|
776
|
-
for machine in status.get("down_machines", []):
|
|
777
|
-
ret += "{} ({}): Down\n".format(machine["hostname"], machine["ip"])
|
|
778
|
-
return ret
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
def is_host_drained(hostname):
|
|
782
|
-
"""Checks if a host has drained successfully by confirming it is
|
|
783
|
-
draining and currently running 0 tasks
|
|
784
|
-
:param hostname: hostname to check
|
|
785
|
-
:returns: True or False
|
|
786
|
-
"""
|
|
787
|
-
return (
|
|
788
|
-
is_host_draining(hostname=hostname)
|
|
789
|
-
and get_count_running_tasks_on_slave(hostname) == 0
|
|
790
|
-
)
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
def is_host_past_maintenance_start(hostname):
|
|
794
|
-
"""Checks if a host has reached the start of its maintenance window
|
|
795
|
-
:param hostname: hostname to check
|
|
796
|
-
:returns: True or False
|
|
797
|
-
"""
|
|
798
|
-
return hostname in get_hosts_past_maintenance_start()
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
def is_host_past_maintenance_end(hostname):
|
|
802
|
-
"""Checks if a host has reached the end of its maintenance window
|
|
803
|
-
:param hostname: hostname to check
|
|
804
|
-
:returns: True or False
|
|
805
|
-
"""
|
|
806
|
-
return hostname in get_hosts_past_maintenance_end()
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
def get_hosts_past_maintenance_start(grace=0):
|
|
810
|
-
"""Get a list of hosts that have reached the start of their maintenance window
|
|
811
|
-
:param grace: integer number of nanoseconds to allow a host to be left in the draining
|
|
812
|
-
state after the start of its maintenance window before we consider it past its maintenance start
|
|
813
|
-
:returns: List of hostnames
|
|
814
|
-
"""
|
|
815
|
-
schedules = get_maintenance_schedule().json()["get_maintenance_schedule"][
|
|
816
|
-
"schedule"
|
|
817
|
-
]
|
|
818
|
-
current_time = datetime_to_nanoseconds(now()) - grace
|
|
819
|
-
ret = []
|
|
820
|
-
if "windows" in schedules:
|
|
821
|
-
for window in schedules["windows"]:
|
|
822
|
-
if window["unavailability"]["start"]["nanoseconds"] < current_time:
|
|
823
|
-
ret += [host["hostname"] for host in window["machine_ids"]]
|
|
824
|
-
log.debug(f"Hosts past maintenance start: {ret}")
|
|
825
|
-
return ret
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
def get_hosts_past_maintenance_end(grace=0):
|
|
829
|
-
"""Get a list of hosts that have reached the end of their maintenance window
|
|
830
|
-
:param grace: integer number of nanoseconds to allow a host to be left in the down
|
|
831
|
-
state after the end of its maintenance window before we consider it past its maintenance end
|
|
832
|
-
:returns: List of hostnames
|
|
833
|
-
"""
|
|
834
|
-
schedules = get_maintenance_schedule().json()["get_maintenance_schedule"][
|
|
835
|
-
"schedule"
|
|
836
|
-
]
|
|
837
|
-
current_time = datetime_to_nanoseconds(now()) - grace
|
|
838
|
-
ret = []
|
|
839
|
-
if "windows" in schedules:
|
|
840
|
-
for window in schedules["windows"]:
|
|
841
|
-
end = (
|
|
842
|
-
window["unavailability"]["start"]["nanoseconds"]
|
|
843
|
-
+ window["unavailability"]["duration"]["nanoseconds"]
|
|
844
|
-
)
|
|
845
|
-
if end < current_time:
|
|
846
|
-
ret += [host["hostname"] for host in window["machine_ids"]]
|
|
847
|
-
log.debug(f"Hosts past maintenance end: {ret}")
|
|
848
|
-
return ret
|