paasta-tools 1.30.8__py3-none-any.whl → 1.30.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. paasta_tools/__init__.py +1 -1
  2. paasta_tools/api/views/instance.py +9 -2
  3. paasta_tools/async_utils.py +4 -1
  4. paasta_tools/bounce_lib.py +8 -5
  5. paasta_tools/check_services_replication_tools.py +10 -4
  6. paasta_tools/check_spark_jobs.py +1 -1
  7. paasta_tools/cli/cli.py +4 -4
  8. paasta_tools/cli/cmds/logs.py +29 -7
  9. paasta_tools/cli/cmds/mark_for_deployment.py +2 -2
  10. paasta_tools/cli/cmds/mesh_status.py +1 -1
  11. paasta_tools/cli/cmds/remote_run.py +1 -1
  12. paasta_tools/cli/cmds/rollback.py +1 -1
  13. paasta_tools/cli/cmds/spark_run.py +3 -3
  14. paasta_tools/cli/cmds/status.py +24 -21
  15. paasta_tools/cli/cmds/validate.py +3 -3
  16. paasta_tools/cli/utils.py +32 -19
  17. paasta_tools/contrib/check_orphans.py +1 -1
  18. paasta_tools/contrib/get_running_task_allocation.py +1 -1
  19. paasta_tools/instance/kubernetes.py +2 -1
  20. paasta_tools/kubernetes_tools.py +1 -1
  21. paasta_tools/metrics/metastatus_lib.py +0 -24
  22. paasta_tools/metrics/metrics_lib.py +12 -3
  23. paasta_tools/setup_kubernetes_job.py +1 -1
  24. paasta_tools/setup_tron_namespace.py +2 -2
  25. paasta_tools/tron_tools.py +1 -1
  26. paasta_tools/utils.py +2 -1
  27. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/check_orphans.py +1 -1
  28. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/check_spark_jobs.py +1 -1
  29. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/get_running_task_allocation.py +1 -1
  30. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/setup_kubernetes_job.py +1 -1
  31. {paasta_tools-1.30.8.dist-info → paasta_tools-1.30.9.dist-info}/METADATA +2 -2
  32. {paasta_tools-1.30.8.dist-info → paasta_tools-1.30.9.dist-info}/RECORD +84 -89
  33. paasta_tools/frameworks/adhoc_scheduler.py +0 -71
  34. paasta_tools/frameworks/native_scheduler.py +0 -652
  35. paasta_tools/frameworks/task_store.py +0 -245
  36. paasta_tools/mesos_maintenance.py +0 -848
  37. paasta_tools/paasta_native_serviceinit.py +0 -21
  38. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/apply_external_resources.py +0 -0
  39. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/bounce_log_latency_parser.py +0 -0
  40. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/check_autoscaler_max_instances.py +0 -0
  41. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/check_cassandracluster_services_replication.py +0 -0
  42. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/check_flink_services_health.py +0 -0
  43. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/check_kubernetes_api.py +0 -0
  44. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/check_kubernetes_services_replication.py +0 -0
  45. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/check_manual_oapi_changes.sh +0 -0
  46. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/check_oom_events.py +0 -0
  47. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/cleanup_kubernetes_cr.py +0 -0
  48. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/cleanup_kubernetes_crd.py +0 -0
  49. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/cleanup_kubernetes_jobs.py +0 -0
  50. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/create_dynamodb_table.py +0 -0
  51. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/create_paasta_playground.py +0 -0
  52. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/delete_kubernetes_deployments.py +0 -0
  53. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/emit_allocated_cpu_metrics.py +0 -0
  54. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/generate_all_deployments +0 -0
  55. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/generate_authenticating_services.py +0 -0
  56. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/generate_deployments_for_service.py +0 -0
  57. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/generate_services_file.py +0 -0
  58. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/generate_services_yaml.py +0 -0
  59. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/habitat_fixer.py +0 -0
  60. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/ide_helper.py +0 -0
  61. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/is_pod_healthy_in_proxy.py +0 -0
  62. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/is_pod_healthy_in_smartstack.py +0 -0
  63. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/kill_bad_containers.py +0 -0
  64. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/kubernetes_remove_evicted_pods.py +0 -0
  65. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/mass-deploy-tag.sh +0 -0
  66. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/mock_patch_checker.py +0 -0
  67. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/paasta_cleanup_remote_run_resources.py +0 -0
  68. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/paasta_cleanup_stale_nodes.py +0 -0
  69. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/paasta_deploy_tron_jobs +0 -0
  70. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/paasta_execute_docker_command.py +0 -0
  71. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/paasta_secrets_sync.py +0 -0
  72. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/paasta_tabcomplete.sh +0 -0
  73. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/paasta_update_soa_memcpu.py +0 -0
  74. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/render_template.py +0 -0
  75. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/rightsizer_soaconfigs_update.py +0 -0
  76. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/service_shard_remove.py +0 -0
  77. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/service_shard_update.py +0 -0
  78. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/setup_istio_mesh.py +0 -0
  79. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/setup_kubernetes_cr.py +0 -0
  80. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/setup_kubernetes_crd.py +0 -0
  81. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/setup_kubernetes_internal_crd.py +0 -0
  82. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/setup_prometheus_adapter_config.py +0 -0
  83. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/shared_ip_check.py +0 -0
  84. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/synapse_srv_namespaces_fact.py +0 -0
  85. {paasta_tools-1.30.8.data → paasta_tools-1.30.9.data}/scripts/timeouts_metrics_prom.py +0 -0
  86. {paasta_tools-1.30.8.dist-info → paasta_tools-1.30.9.dist-info}/WHEEL +0 -0
  87. {paasta_tools-1.30.8.dist-info → paasta_tools-1.30.9.dist-info}/entry_points.txt +0 -0
  88. {paasta_tools-1.30.8.dist-info → paasta_tools-1.30.9.dist-info}/licenses/LICENSE +0 -0
  89. {paasta_tools-1.30.8.dist-info → paasta_tools-1.30.9.dist-info}/top_level.txt +0 -0
@@ -1,848 +0,0 @@
1
- #!/usr/bin/env python
2
- # Copyright 2015-2016 Yelp Inc.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- import argparse
16
- import datetime
17
- import json
18
- import logging
19
- from socket import gaierror
20
- from socket import getfqdn
21
- from socket import gethostbyname
22
- from typing import List
23
- from typing import NamedTuple
24
- from typing import Optional
25
-
26
- import a_sync
27
- from dateutil import parser
28
- from pytimeparse import timeparse
29
- from requests import Request
30
- from requests import Session
31
- from requests.exceptions import HTTPError
32
-
33
- from paasta_tools.mesos_tools import get_count_running_tasks_on_slave
34
- from paasta_tools.mesos_tools import get_mesos_config_path
35
- from paasta_tools.mesos_tools import get_mesos_leader
36
- from paasta_tools.mesos_tools import get_mesos_master
37
- from paasta_tools.mesos_tools import MESOS_MASTER_PORT
38
- from paasta_tools.utils import SystemPaastaConfig
39
- from paasta_tools.utils import time_cache
40
- from paasta_tools.utils import to_bytes
41
-
42
-
43
- log = logging.getLogger(__name__)
44
-
45
-
46
- class Hostname(NamedTuple):
47
- host: str
48
- ip: str
49
-
50
-
51
- class Credentials(NamedTuple):
52
- file: str
53
- principal: str
54
- secret: str
55
-
56
-
57
- class Resource(NamedTuple):
58
- name: str
59
- amount: int
60
-
61
-
62
- MAINTENANCE_ROLE = "maintenance"
63
-
64
-
65
- def base_api(mesos_config_path: Optional[str] = None):
66
- """Helper function for making all API requests
67
-
68
- :returns: a function that can be called to make a request
69
- """
70
- leader = get_mesos_leader(mesos_config_path)
71
-
72
- def execute_request(method, endpoint, timeout=(3, 2), **kwargs):
73
- url = "http://%s:%d%s" % (leader, MESOS_MASTER_PORT, endpoint)
74
- s = Session()
75
- s.auth = (get_principal(), get_secret())
76
- req = Request(method, url, **kwargs)
77
- prepared = s.prepare_request(req)
78
- try:
79
- resp = s.send(prepared, timeout=timeout)
80
- resp.raise_for_status()
81
- return resp
82
- except HTTPError:
83
- raise HTTPError("Error executing API request calling %s." % url)
84
-
85
- return execute_request
86
-
87
-
88
- def master_api(mesos_config_path: Optional[str] = None):
89
- """Helper function for making API requests to the /master API endpoints
90
-
91
- :returns: a function that can be called to make a request to /master
92
- """
93
-
94
- def execute_master_api_request(method, endpoint, **kwargs):
95
- base_api_client = base_api(mesos_config_path=mesos_config_path)
96
- return base_api_client(method, "/master%s" % endpoint, **kwargs)
97
-
98
- return execute_master_api_request
99
-
100
-
101
- def operator_api(mesos_config_path: Optional[str] = None):
102
- def execute_operator_api_request(**kwargs):
103
- base_api_client = base_api(mesos_config_path=mesos_config_path)
104
- if "headers" in kwargs:
105
- kwargs["headers"]["Content-Type"] = "application/json"
106
- else:
107
- kwargs["headers"] = {"Content-Type": "application/json"}
108
- data = kwargs.pop("data")
109
- return base_api_client("POST", "/api/v1", data=json.dumps(data), **kwargs)
110
-
111
- return execute_operator_api_request
112
-
113
-
114
- def reserve_api():
115
- """Helper function for making API requests to the /reserve API endpoints
116
-
117
- :returns: a function that can be called to make a request to /reserve
118
- """
119
-
120
- def execute_reserve_api_request(method, endpoint, **kwargs):
121
- master_api_client = master_api()
122
- return master_api_client(method, "/reserve%s" % endpoint, **kwargs)
123
-
124
- return execute_reserve_api_request
125
-
126
-
127
- def unreserve_api():
128
- """Helper function for making API requests to the /unreserve API endpoints
129
-
130
- :returns: a function that can be called to make a request to /unreserve
131
- """
132
-
133
- def execute_unreserve_api_request(method, endpoint, **kwargs):
134
- master_api_client = master_api()
135
- return master_api_client(method, "/unreserve%s" % endpoint, **kwargs)
136
-
137
- return execute_unreserve_api_request
138
-
139
-
140
- def maintenance_api():
141
- """Helper function for making API requests to the /master/maintenance API endpoints
142
-
143
- :returns: a function that can be called to make a request to /master/maintenance
144
- """
145
-
146
- def execute_schedule_api_request(method, endpoint, **kwargs):
147
- master_api_client = master_api()
148
- return master_api_client(
149
- method, "/maintenance%s" % endpoint, timeout=(3, 10), **kwargs
150
- )
151
-
152
- return execute_schedule_api_request
153
-
154
-
155
- def get_schedule_client():
156
- """Helper function for making API requests to the /master/maintenance/schedule API endpoints
157
-
158
- :returns: a function that can be called to make a request to /master/maintenance/schedule
159
- """
160
-
161
- def execute_schedule_api_request(method, endpoint, **kwargs):
162
- maintenance_api_client = maintenance_api()
163
- return maintenance_api_client(method, "/schedule%s" % endpoint, **kwargs)
164
-
165
- return execute_schedule_api_request
166
-
167
-
168
- def get_maintenance_schedule():
169
- """Makes a GET_MAINTENANCE_SCHEDULE request to the operator api
170
-
171
- :returns: a GET_MAINTENANCE_SCHEDULE response
172
- """
173
- client_fn = operator_api()
174
- return client_fn(data={"type": "GET_MAINTENANCE_SCHEDULE"})
175
-
176
-
177
- @time_cache(ttl=10)
178
- def get_maintenance_status(mesos_config_path: Optional[str] = None):
179
- """Makes a GET_MAINTENANCE_STATUS request to the operator api
180
-
181
- :returns: a GET_MAINTENANCE_STATUS response
182
- """
183
- client_fn = operator_api(mesos_config_path=mesos_config_path)
184
- return client_fn(data={"type": "GET_MAINTENANCE_STATUS"})
185
-
186
-
187
- def schedule():
188
- """Get the Mesos maintenance schedule. This contains hostname/ip mappings and their maintenance window.
189
- :returns: GET_MAINTENANCE_SCHEDULE response text
190
- """
191
- try:
192
- schedule = get_maintenance_schedule()
193
- except HTTPError:
194
- raise HTTPError("Error getting maintenance schedule.")
195
- return schedule.text
196
-
197
-
198
- def get_hosts_with_state(
199
- state, system_paasta_config: Optional[SystemPaastaConfig] = None
200
- ) -> List[str]:
201
- """Helper function to check the maintenance status and return all hosts
202
- listed as being in a current state
203
-
204
- :param state: State we are interested in ('down_machines' or 'draining_machines')
205
- :returns: A list of hostnames in the specified state or an empty list if no machines
206
- """
207
-
208
- mesos_config_path = get_mesos_config_path(system_paasta_config)
209
- try:
210
- status = get_maintenance_status(mesos_config_path).json()
211
- status = status["get_maintenance_status"]["status"]
212
- except HTTPError:
213
- raise HTTPError("Error getting maintenance status.")
214
- if not status or state not in status:
215
- return []
216
- if "id" in status[state][0]:
217
- return [machine["id"]["hostname"] for machine in status[state]]
218
- else:
219
- return [machine["hostname"] for machine in status[state]]
220
-
221
-
222
- def get_draining_hosts(system_paasta_config: Optional[SystemPaastaConfig] = None):
223
- """Returns a list of hostnames that are marked as draining
224
-
225
- :returns: a list of strings representing hostnames
226
- """
227
- return get_hosts_with_state(
228
- state="draining_machines", system_paasta_config=system_paasta_config
229
- )
230
-
231
-
232
- def get_down_hosts():
233
- """Returns a list of hostnames that are marked as down
234
-
235
- :returns: a list of strings representing hostnames
236
- """
237
- return get_hosts_with_state(state="down_machines")
238
-
239
-
240
- def is_host_draining(hostname=getfqdn()):
241
- """Checks if the specified hostname is marked as draining
242
-
243
- :param hostname: Hostname we want to check if draining (defaults to current host)
244
- :returns: a boolean representing whether or not the specified hostname is draining
245
- """
246
- return hostname in get_draining_hosts()
247
-
248
-
249
- def is_host_down(hostname=getfqdn()):
250
- """Checks if the specified hostname is marked as down
251
-
252
- :param hostname: Hostname we want to check if down (defaults to current host)
253
- :returns: a boolean representing whether or not the specified hostname is down
254
- """
255
- return hostname in get_down_hosts()
256
-
257
-
258
- def get_hosts_forgotten_draining(grace=0):
259
- """Find hosts that are still marked as draining (rather than down) after the start
260
- of their maintenance window.
261
- :param grace: integer number of nanoseconds to allow a host to be left in the draining
262
- state after the start of its maintenance window before we consider it forgotten.
263
- :returns: a list of hostnames of hosts forgotten draining
264
- """
265
- draining_hosts = get_draining_hosts()
266
- log.debug("draining_hosts: %s" % draining_hosts)
267
-
268
- hosts_past_maintenance_start = get_hosts_past_maintenance_start(grace=grace)
269
- log.debug("hosts_past_maintenance_start: %s" % hosts_past_maintenance_start)
270
-
271
- forgotten_draining = list(
272
- set(draining_hosts).intersection(hosts_past_maintenance_start)
273
- )
274
- log.debug("forgotten_draining: %s" % forgotten_draining)
275
-
276
- return forgotten_draining
277
-
278
-
279
- def are_hosts_forgotten_draining():
280
- """Quick way to test if there are any forgotten draining hosts.
281
- :returns: a boolean that is True if there are any forgotten draining
282
- hosts and False otherwise
283
- """
284
- return bool(get_hosts_forgotten_draining())
285
-
286
-
287
- def get_hosts_forgotten_down(grace=0):
288
- """Find hosts that are still marked as down (rather than up) after the end
289
- of their maintenance window.
290
- :param grace: integer number of nanoseconds to allow a host to be left in the down
291
- state after the end of its maintenance window before we consider it forgotten.
292
- :returns: a list of hostnames of hosts forgotten down
293
- """
294
- down_hosts = get_down_hosts()
295
- log.debug("down_hosts: %s" % down_hosts)
296
-
297
- hosts_past_maintenance_end = get_hosts_past_maintenance_end(grace=grace)
298
- log.debug("hosts_past_maintenance_end: %s" % hosts_past_maintenance_end)
299
-
300
- forgotten_down = list(set(down_hosts).intersection(hosts_past_maintenance_end))
301
- log.debug("forgotten_down: %s" % forgotten_down)
302
-
303
- return forgotten_down
304
-
305
-
306
- def are_hosts_forgotten_down():
307
- """Quick way to test if there are any forgotten down hosts.
308
- :returns: a boolean that is True if there are any forgotten down
309
- hosts and False otherwise
310
- """
311
- return bool(get_hosts_forgotten_down())
312
-
313
-
314
- def parse_timedelta(value):
315
- """Return the delta in nanoseconds.
316
- :param value: a string containing a time format supported by :mod:`pytimeparse`
317
- :returns: an integer (or float) representing the specified delta in nanoseconds
318
- """
319
- error_msg = "'%s' is not a valid time expression" % value
320
- try:
321
- seconds = timeparse.timeparse(value)
322
- except TypeError:
323
- raise argparse.ArgumentTypeError(error_msg)
324
- if not seconds:
325
- raise argparse.ArgumentTypeError(error_msg)
326
- return seconds_to_nanoseconds(seconds)
327
-
328
-
329
- def parse_datetime(value):
330
- """Return the datetime in nanoseconds.
331
- :param value: a string containing a datetime supported by :mod:`dateutil.parser`
332
- :returns: an integer (or float) representing the specified datetime in nanoseconds
333
- """
334
- error_msg = "'%s' is not a valid datetime expression" % value
335
- try:
336
- dt = parser.parse(value)
337
- except Exception:
338
- raise argparse.ArgumentTypeError(error_msg)
339
- if not dt:
340
- raise argparse.ArgumentTypeError(error_msg)
341
- return datetime_to_nanoseconds(dt)
342
-
343
-
344
- def datetime_seconds_from_now(seconds):
345
- """Given a number of seconds, returns a datetime object representing that number of seconds in the future from the
346
- current time.
347
- :param seconds: an integer representing a certain number of seconds
348
- :returns: a datetime.timedelta representing now + the specified number of seconds
349
- """
350
- return now() + datetime.timedelta(seconds=seconds)
351
-
352
-
353
- def now():
354
- """Returns a datetime object representing the current time
355
-
356
- :returns: a datetime.datetime object representing the current time
357
- """
358
- return datetime.datetime.now()
359
-
360
-
361
- def seconds_to_nanoseconds(seconds):
362
- """Convert the specified number of seconds to nanoseconds
363
- :param seconds: an integer representing a certain number of seconds
364
- :returns: an integer (or float) representation of the specified number of seconds as nanoseconds
365
- """
366
- return seconds * 1000000000
367
-
368
-
369
- def datetime_to_nanoseconds(dt):
370
- """Convert the provided datetime object into nanoseconds
371
-
372
- :returns: an integer (or float) representation of the specified datetime as nanoseconds
373
- """
374
- return seconds_to_nanoseconds(int(dt.strftime("%s")))
375
-
376
-
377
- def build_maintenance_payload(hostnames, maint_type):
378
- """Creates the JSON payload necessary to bring the specified hostnames up/down for maintenance.
379
- :param hostnames: a list of hostnames
380
- :returns: a dictionary representing the list of machines to bring up/down for maintenance
381
- """
382
- return {
383
- "type": maint_type.upper(),
384
- maint_type.lower(): {"machines": get_machine_ids(hostnames)},
385
- }
386
-
387
-
388
- def hostnames_to_components(hostnames, resolve=False):
389
- """Converts a list of 'host[|ip]' entries into namedtuples containing 'host' and 'ip' attributes,
390
- optionally performing a DNS lookup to resolve the hostname into an IP address
391
- :param hostnames: a list of hostnames where each hostname can be of the form 'host[|ip]'
392
- :param resolve: boolean representing whether to lookup the IP address corresponding to the hostname via DNS
393
- :returns: a namedtuple containing the hostname and IP components
394
- """
395
-
396
- components = []
397
- for hostname in hostnames:
398
- # This is to allow specifying a hostname as "hostname|ipaddress"
399
- # to avoid querying DNS for the IP.
400
- if "|" in hostname:
401
- (host, ip) = hostname.split("|")
402
- components.append(Hostname(host=host, ip=ip))
403
- else:
404
- try:
405
- ip = gethostbyname(hostname) if resolve else None
406
- except gaierror:
407
- log.error(f"Failed to resolve IP for {hostname}, continuing regardless")
408
- continue
409
- components.append(Hostname(host=hostname, ip=ip))
410
- return components
411
-
412
-
413
- def get_machine_ids(hostnames):
414
- """Helper function to convert a list of hostnames into a JSON list of hostname/ip pairs.
415
- :param hostnames: a list of hostnames
416
- :returns: a dictionary representing the list of machines to bring up/down for maintenance
417
- """
418
- machine_ids = []
419
- components = hostnames_to_components(hostnames, resolve=True)
420
- for component in components:
421
- machine_id = {"hostname": component.host, "ip": component.ip}
422
- machine_ids.append(machine_id)
423
- return machine_ids
424
-
425
-
426
- def build_reservation_payload(resources):
427
- """Creates the JSON payload needed to dynamically (un)reserve resources in mesos.
428
- :param resources: list of Resource named tuples specifying the name and amount of the resource to (un)reserve
429
- :returns: a dictionary that can be sent to Mesos to (un)reserve resources
430
- """
431
- payload = []
432
- for resource in resources:
433
- payload.append(
434
- {
435
- "name": resource.name,
436
- "type": "SCALAR",
437
- "scalar": {"value": resource.amount},
438
- "role": MAINTENANCE_ROLE,
439
- "reservation": {"principal": get_principal()},
440
- }
441
- )
442
- return payload
443
-
444
-
445
- def build_maintenance_schedule_payload(
446
- hostnames, start=None, duration=None, drain=True
447
- ):
448
- """Creates the JSON payload needed to (un)schedule maintenance on the specified hostnames.
449
- :param hostnames: a list of hostnames
450
- :param start: the time to start the maintenance, represented as number of nanoseconds since the epoch
451
- :param duration: length of the maintenance window, represented as number of nanoseconds since the epoch
452
- :param drain: boolean to note whether we are draining (True) the specified hosts or undraining (False) them
453
- :returns: a dictionary that can be sent to Mesos to (un)schedule maintenance
454
- """
455
- schedule = get_maintenance_schedule().json()["get_maintenance_schedule"]["schedule"]
456
- machine_ids = get_machine_ids(hostnames)
457
-
458
- if drain:
459
- unavailability = dict()
460
- unavailability["start"] = dict()
461
- unavailability["start"]["nanoseconds"] = int(start)
462
- unavailability["duration"] = dict()
463
- unavailability["duration"]["nanoseconds"] = int(duration)
464
-
465
- window = dict()
466
- window["machine_ids"] = machine_ids
467
- window["unavailability"] = unavailability
468
-
469
- if schedule:
470
- for existing_window in schedule["windows"]:
471
- for existing_machine_id in existing_window["machine_ids"]:
472
- # If we already have a maintenance window scheduled for one of the hosts,
473
- # replace it with the new window.
474
- if existing_machine_id in machine_ids:
475
- existing_window["machine_ids"].remove(existing_machine_id)
476
- if not existing_window["machine_ids"]:
477
- schedule["windows"].remove(existing_window)
478
- if drain:
479
- windows = schedule["windows"] + [window]
480
- else:
481
- windows = schedule["windows"]
482
- elif drain:
483
- windows = [window]
484
- else:
485
- windows = []
486
-
487
- payload = dict()
488
- payload["windows"] = windows
489
-
490
- return {
491
- "type": "UPDATE_MAINTENANCE_SCHEDULE",
492
- "update_maintenance_schedule": {"schedule": payload},
493
- }
494
-
495
-
496
- def load_credentials(mesos_secrets="/nail/etc/mesos-slave-secret"):
497
- """Loads the mesos-slave credentials from the specified file. These credentials will be used for all
498
- maintenance API requests.
499
- :param mesos_secrets: optional argument specifying the path to the file containing the mesos-slave credentials
500
- :returns: a tuple of the form (username, password)
501
- """
502
- try:
503
- with open(mesos_secrets) as data_file:
504
- data = json.load(data_file)
505
- except EnvironmentError:
506
- log.error(
507
- "maintenance calls must be run on a Mesos slave containing valid credentials (%s)"
508
- % mesos_secrets
509
- )
510
- raise
511
- try:
512
- username = data["principal"]
513
- password = data["secret"]
514
- except KeyError:
515
- log.error(
516
- "%s does not contain Mesos slave credentials in the expected format. "
517
- "See http://mesos.apache.org/documentation/latest/authentication/ for details"
518
- % mesos_secrets
519
- )
520
- raise
521
- return Credentials(file=mesos_secrets, principal=username, secret=password)
522
-
523
-
524
- def get_principal(mesos_secrets="/nail/etc/mesos-slave-secret"):
525
- """Helper function to get the principal from the mesos-slave credentials
526
- :param mesos_secrets: optional argument specifying the path to the file containing the mesos-slave credentials
527
- :returns: a string containing the principal/username
528
- """
529
- return load_credentials(mesos_secrets).principal
530
-
531
-
532
- def get_secret(mesos_secrets="/nail/etc/mesos-slave-secret"):
533
- """Helper function to get the secret from the mesos-slave credentials
534
- :param mesos_secrets: optional argument specifying the path to the file containing the mesos-slave credentials
535
- :returns: a string containing the secret/password
536
- """
537
- return load_credentials(mesos_secrets).secret
538
-
539
-
540
- def _make_request_payload(slave_id, reservation_payload):
541
- return {
542
- "slaveId": slave_id.encode("UTF-8"),
543
- # We used to_bytes here since py2 json doesn't have a well defined
544
- # return type. When moving to python 3, replace with .encode()
545
- "resources": to_bytes(json.dumps(reservation_payload)).replace(b"+", b"%20"),
546
- }
547
-
548
-
549
- def _make_operator_reservation_request_payload(slave_id, payload, request_type):
550
- return {
551
- "type": request_type.upper(),
552
- request_type.lower(): {"agent_id": {"value": slave_id}},
553
- "resources": payload,
554
- }
555
-
556
-
557
- def reserve(slave_id, resources):
558
- """Dynamically reserve resources in mesos to prevent tasks from using them.
559
- :param slave_id: the id of the mesos slave
560
- :param resources: list of Resource named tuples specifying the name and amount of the resource to (un)reserve
561
- :returns: boolean where 0 represents success and 1 is a failure
562
- """
563
- log.info(f"Dynamically reserving resources on {slave_id}: {resources}")
564
- payload = _make_operator_reservation_request_payload(
565
- slave_id=slave_id,
566
- payload=build_reservation_payload(resources),
567
- request_type="reserve_resources",
568
- )
569
- client_fn = operator_api()
570
- try:
571
- print(payload)
572
- reserve_output = client_fn(data=payload).text
573
- except HTTPError:
574
- raise HTTPError("Error adding dynamic reservation.")
575
- return reserve_output
576
-
577
-
578
- def unreserve(slave_id, resources):
579
- """Dynamically unreserve resources in mesos to allow tasks to using them.
580
- :param slave_id: the id of the mesos slave
581
- :param resources: list of Resource named tuples specifying the name and amount of the resource to (un)reserve
582
- :returns: boolean where 0 represents success and 1 is a failure
583
- """
584
- log.info(f"Dynamically unreserving resources on {slave_id}: {resources}")
585
- payload = _make_operator_reservation_request_payload(
586
- slave_id=slave_id,
587
- payload=build_reservation_payload(resources),
588
- request_type="unreserve_resources",
589
- )
590
- client_fn = operator_api()
591
- try:
592
- unreserve_output = client_fn(data=payload).text
593
- except HTTPError:
594
- raise HTTPError("Error adding dynamic unreservation.")
595
- return unreserve_output
596
-
597
-
598
- def components_to_hosts(components):
599
- """Convert a list of Component namedtuples to a list of their hosts
600
- :param components: a list of Component namedtuples
601
- :returns: list of the hosts associated with each Component
602
- """
603
- hosts = []
604
- for component in components:
605
- hosts.append(component.host)
606
- return hosts
607
-
608
-
609
- def reserve_all_resources(hostnames):
610
- """Dynamically reserve all available resources on the specified hosts
611
- :param hostnames: list of hostnames to reserve resources on
612
- """
613
- mesos_state = a_sync.block(get_mesos_master().state_summary)
614
- components = hostnames_to_components(hostnames)
615
- hosts = components_to_hosts(components)
616
- known_slaves = [
617
- slave for slave in mesos_state["slaves"] if slave["hostname"] in hosts
618
- ]
619
- for slave in known_slaves:
620
- hostname = slave["hostname"]
621
- log.info("Reserving all resources on %s" % hostname)
622
- slave_id = slave["id"]
623
- resources = []
624
- for resource in ["disk", "mem", "cpus", "gpus"]:
625
- free_resource = (
626
- slave["resources"][resource] - slave["used_resources"][resource]
627
- )
628
- for role in slave["reserved_resources"]:
629
- free_resource -= slave["reserved_resources"][role][resource]
630
- resources.append(Resource(name=resource, amount=free_resource))
631
- try:
632
- reserve(slave_id=slave_id, resources=resources)
633
- except HTTPError:
634
- raise HTTPError(
635
- f"Failed reserving all of the resources on {hostname} ({slave_id}). Aborting."
636
- )
637
-
638
-
639
- def unreserve_all_resources(hostnames):
640
- """Dynamically unreserve all available resources on the specified hosts
641
- :param hostnames: list of hostnames to unreserve resources on
642
- """
643
- mesos_state = a_sync.block(get_mesos_master().state_summary)
644
- components = hostnames_to_components(hostnames)
645
- hosts = components_to_hosts(components)
646
- known_slaves = [
647
- slave for slave in mesos_state["slaves"] if slave["hostname"] in hosts
648
- ]
649
- for slave in known_slaves:
650
- hostname = slave["hostname"]
651
- log.info("Unreserving all resources on %s" % hostname)
652
- slave_id = slave["id"]
653
- resources = []
654
- if MAINTENANCE_ROLE in slave["reserved_resources"]:
655
- for resource in ["disk", "mem", "cpus", "gpus"]:
656
- reserved_resource = slave["reserved_resources"][MAINTENANCE_ROLE][
657
- resource
658
- ]
659
- resources.append(Resource(name=resource, amount=reserved_resource))
660
- try:
661
- unreserve(slave_id=slave_id, resources=resources)
662
- except HTTPError:
663
- raise HTTPError(
664
- f"Failed unreserving all of the resources on {hostname} ({slave_id}). Aborting."
665
- )
666
-
667
-
668
- def drain(hostnames, start, duration, reserve_resources=True):
669
- """Schedules a maintenance window for the specified hosts and marks them as draining.
670
- :param hostnames: a list of hostnames
671
- :param start: the time to start the maintenance, represented as number of nanoseconds since the epoch
672
- :param duration: length of the maintenance window, represented as number of nanoseconds since the epoch
673
- :param reserve_resources: bool setting to also reserve the free resources on the agent before the drain call
674
- :returns: None
675
- """
676
- log.info("Draining: %s" % hostnames)
677
- if reserve_resources:
678
- try:
679
- reserve_all_resources(hostnames)
680
- except HTTPError as e:
681
- log.warning("Failed to reserve resources, will continue to drain: %s" % e)
682
- payload = build_maintenance_schedule_payload(hostnames, start, duration, drain=True)
683
- client_fn = operator_api()
684
- try:
685
- drain_output = client_fn(data=payload).text
686
- except HTTPError:
687
- raise HTTPError("Error performing maintenance drain.")
688
- return drain_output
689
-
690
-
691
- def undrain(hostnames, unreserve_resources=True):
692
- """Unschedules the maintenance window for the specified hosts and unmarks them as draining. They are ready for
693
- regular use.
694
- :param hostnames: a list of hostnames
695
- :param unreserve_resources: bool setting to also unreserve resources on the agent before the undrain call
696
- :returns: None
697
- """
698
- log.info("Undraining: %s" % hostnames)
699
- if unreserve_resources:
700
- try:
701
- unreserve_all_resources(hostnames)
702
- except HTTPError as e:
703
- log.warning(
704
- "Failed to unreserve resources, will continue to undrain: %s" % e
705
- )
706
- payload = build_maintenance_schedule_payload(hostnames, drain=False)
707
- client_fn = get_schedule_client()
708
- client_fn = operator_api()
709
- try:
710
- undrain_output = client_fn(data=payload).text
711
- except HTTPError:
712
- raise HTTPError("Error performing maintenance undrain.")
713
- return undrain_output
714
-
715
-
716
- def down(hostnames):
717
- """Marks the specified hostnames as being down for maintenance, and makes them unavailable for use.
718
- :param hostnames: a list of hostnames
719
- :returns: None
720
- """
721
- log.info("Bringing down: %s" % hostnames)
722
- payload = build_maintenance_payload(hostnames, "start_maintenance")
723
- client_fn = operator_api()
724
- try:
725
- down_output = client_fn(data=payload).text
726
- except HTTPError:
727
- raise HTTPError("Error performing maintenance down.")
728
- return down_output
729
-
730
-
731
- def up(hostnames):
732
- """Marks the specified hostnames as no longer being down for maintenance, and makes them available for use.
733
- :param hostnames: a list of hostnames
734
- :returns: None
735
- """
736
- log.info("Bringing up: %s" % hostnames)
737
- payload = build_maintenance_payload(hostnames, "stop_maintenance")
738
- client_fn = operator_api()
739
- try:
740
- up_output = client_fn(data=payload).text
741
- except HTTPError:
742
- raise HTTPError("Error performing maintenance up.")
743
- return up_output
744
-
745
-
746
- def raw_status():
747
- """Get the Mesos maintenance status. This contains hostname/ip mappings for hosts that are either marked as being
748
- down for maintenance or draining.
749
- :returns: Response Object containing status
750
- """
751
- try:
752
- status = get_maintenance_status()
753
- except HTTPError:
754
- raise HTTPError("Error performing maintenance status.")
755
- return status
756
-
757
-
758
- def status():
759
- """Get the Mesos maintenance status. This contains hostname/ip mappings for hosts that are either marked as being
760
- down for maintenance or draining.
761
- :returns: Text representation of the status
762
- """
763
- return raw_status().text
764
-
765
-
766
- def friendly_status():
767
- """Display the Mesos maintenance status in a human-friendly way.
768
- :returns: Text representation of the human-friendly status
769
- """
770
- status = raw_status().json()["get_maintenance_status"]["status"]
771
- ret = ""
772
- for machine in status.get("draining_machines", []):
773
- ret += "{} ({}): Draining\n".format(
774
- machine["id"]["hostname"], machine["id"]["ip"]
775
- )
776
- for machine in status.get("down_machines", []):
777
- ret += "{} ({}): Down\n".format(machine["hostname"], machine["ip"])
778
- return ret
779
-
780
-
781
- def is_host_drained(hostname):
782
- """Checks if a host has drained successfully by confirming it is
783
- draining and currently running 0 tasks
784
- :param hostname: hostname to check
785
- :returns: True or False
786
- """
787
- return (
788
- is_host_draining(hostname=hostname)
789
- and get_count_running_tasks_on_slave(hostname) == 0
790
- )
791
-
792
-
793
- def is_host_past_maintenance_start(hostname):
794
- """Checks if a host has reached the start of its maintenance window
795
- :param hostname: hostname to check
796
- :returns: True or False
797
- """
798
- return hostname in get_hosts_past_maintenance_start()
799
-
800
-
801
- def is_host_past_maintenance_end(hostname):
802
- """Checks if a host has reached the end of its maintenance window
803
- :param hostname: hostname to check
804
- :returns: True or False
805
- """
806
- return hostname in get_hosts_past_maintenance_end()
807
-
808
-
809
- def get_hosts_past_maintenance_start(grace=0):
810
- """Get a list of hosts that have reached the start of their maintenance window
811
- :param grace: integer number of nanoseconds to allow a host to be left in the draining
812
- state after the start of its maintenance window before we consider it past its maintenance start
813
- :returns: List of hostnames
814
- """
815
- schedules = get_maintenance_schedule().json()["get_maintenance_schedule"][
816
- "schedule"
817
- ]
818
- current_time = datetime_to_nanoseconds(now()) - grace
819
- ret = []
820
- if "windows" in schedules:
821
- for window in schedules["windows"]:
822
- if window["unavailability"]["start"]["nanoseconds"] < current_time:
823
- ret += [host["hostname"] for host in window["machine_ids"]]
824
- log.debug(f"Hosts past maintenance start: {ret}")
825
- return ret
826
-
827
-
828
- def get_hosts_past_maintenance_end(grace=0):
829
- """Get a list of hosts that have reached the end of their maintenance window
830
- :param grace: integer number of nanoseconds to allow a host to be left in the down
831
- state after the end of its maintenance window before we consider it past its maintenance end
832
- :returns: List of hostnames
833
- """
834
- schedules = get_maintenance_schedule().json()["get_maintenance_schedule"][
835
- "schedule"
836
- ]
837
- current_time = datetime_to_nanoseconds(now()) - grace
838
- ret = []
839
- if "windows" in schedules:
840
- for window in schedules["windows"]:
841
- end = (
842
- window["unavailability"]["start"]["nanoseconds"]
843
- + window["unavailability"]["duration"]["nanoseconds"]
844
- )
845
- if end < current_time:
846
- ret += [host["hostname"] for host in window["machine_ids"]]
847
- log.debug(f"Hosts past maintenance end: {ret}")
848
- return ret