kaqing 2.0.115__py3-none-any.whl → 2.0.172__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kaqing might be problematic. Click here for more details.
- adam/__init__.py +0 -2
- adam/app_session.py +8 -11
- adam/batch.py +3 -3
- adam/checks/check_utils.py +14 -46
- adam/checks/cpu.py +7 -1
- adam/checks/cpu_metrics.py +52 -0
- adam/checks/disk.py +2 -3
- adam/columns/columns.py +3 -1
- adam/columns/cpu.py +3 -1
- adam/columns/cpu_metrics.py +22 -0
- adam/columns/memory.py +3 -4
- adam/commands/__init__.py +18 -0
- adam/commands/alter_tables.py +43 -47
- adam/commands/audit/audit.py +24 -25
- adam/commands/audit/audit_repair_tables.py +14 -17
- adam/commands/audit/audit_run.py +15 -23
- adam/commands/audit/show_last10.py +10 -13
- adam/commands/audit/show_slow10.py +10 -13
- adam/commands/audit/show_top10.py +10 -14
- adam/commands/audit/utils_show_top10.py +2 -3
- adam/commands/bash/__init__.py +5 -0
- adam/commands/bash/bash.py +8 -96
- adam/commands/bash/utils_bash.py +16 -0
- adam/commands/cat.py +14 -19
- adam/commands/cd.py +12 -100
- adam/commands/check.py +20 -21
- adam/commands/cli_commands.py +2 -3
- adam/commands/code.py +20 -23
- adam/commands/command.py +123 -39
- adam/commands/commands_utils.py +8 -17
- adam/commands/cp.py +33 -39
- adam/commands/cql/cql_completions.py +28 -10
- adam/commands/cql/cqlsh.py +10 -30
- adam/commands/cql/utils_cql.py +343 -0
- adam/commands/deploy/code_start.py +7 -10
- adam/commands/deploy/code_stop.py +4 -21
- adam/commands/deploy/code_utils.py +3 -3
- adam/commands/deploy/deploy.py +4 -27
- adam/commands/deploy/deploy_frontend.py +14 -17
- adam/commands/deploy/deploy_pg_agent.py +2 -5
- adam/commands/deploy/deploy_pod.py +65 -73
- adam/commands/deploy/deploy_utils.py +14 -24
- adam/commands/deploy/undeploy.py +4 -27
- adam/commands/deploy/undeploy_frontend.py +4 -7
- adam/commands/deploy/undeploy_pg_agent.py +5 -7
- adam/commands/deploy/undeploy_pod.py +11 -12
- adam/commands/devices/__init__.py +0 -0
- adam/commands/devices/device.py +118 -0
- adam/commands/devices/device_app.py +173 -0
- adam/commands/devices/device_auit_log.py +49 -0
- adam/commands/devices/device_cass.py +185 -0
- adam/commands/devices/device_export.py +86 -0
- adam/commands/devices/device_postgres.py +144 -0
- adam/commands/devices/devices.py +25 -0
- adam/commands/exit.py +1 -4
- adam/commands/export/clean_up_all_export_sessions.py +37 -0
- adam/commands/export/clean_up_export_sessions.py +51 -0
- adam/commands/export/drop_export_database.py +55 -0
- adam/commands/export/drop_export_databases.py +43 -0
- adam/commands/export/export.py +19 -26
- adam/commands/export/export_databases.py +174 -0
- adam/commands/export/export_handlers.py +71 -0
- adam/commands/export/export_select.py +48 -22
- adam/commands/export/export_select_x.py +54 -0
- adam/commands/export/export_use.py +19 -23
- adam/commands/export/exporter.py +353 -0
- adam/commands/export/import_session.py +40 -0
- adam/commands/export/importer.py +67 -0
- adam/commands/export/importer_athena.py +77 -0
- adam/commands/export/importer_sqlite.py +39 -0
- adam/commands/export/show_column_counts.py +54 -0
- adam/commands/export/show_export_databases.py +36 -0
- adam/commands/export/show_export_session.py +48 -0
- adam/commands/export/show_export_sessions.py +44 -0
- adam/commands/export/utils_export.py +223 -162
- adam/commands/help.py +1 -1
- adam/commands/intermediate_command.py +49 -0
- adam/commands/issues.py +11 -43
- adam/commands/kubectl.py +3 -6
- adam/commands/login.py +22 -24
- adam/commands/logs.py +3 -6
- adam/commands/ls.py +11 -128
- adam/commands/medusa/medusa.py +4 -22
- adam/commands/medusa/medusa_backup.py +20 -24
- adam/commands/medusa/medusa_restore.py +29 -33
- adam/commands/medusa/medusa_show_backupjobs.py +14 -18
- adam/commands/medusa/medusa_show_restorejobs.py +11 -18
- adam/commands/nodetool.py +6 -15
- adam/commands/param_get.py +11 -12
- adam/commands/param_set.py +9 -10
- adam/commands/postgres/postgres.py +41 -34
- adam/commands/postgres/postgres_context.py +57 -24
- adam/commands/postgres/postgres_ls.py +4 -8
- adam/commands/postgres/postgres_preview.py +5 -9
- adam/commands/postgres/psql_completions.py +1 -1
- adam/commands/postgres/utils_postgres.py +66 -0
- adam/commands/preview_table.py +5 -44
- adam/commands/pwd.py +14 -47
- adam/commands/reaper/reaper.py +4 -27
- adam/commands/reaper/reaper_forward.py +48 -55
- adam/commands/reaper/reaper_forward_session.py +6 -0
- adam/commands/reaper/reaper_forward_stop.py +10 -16
- adam/commands/reaper/reaper_restart.py +7 -14
- adam/commands/reaper/reaper_run_abort.py +11 -30
- adam/commands/reaper/reaper_runs.py +42 -57
- adam/commands/reaper/reaper_runs_abort.py +29 -49
- adam/commands/reaper/reaper_schedule_activate.py +11 -30
- adam/commands/reaper/reaper_schedule_start.py +10 -29
- adam/commands/reaper/reaper_schedule_stop.py +10 -29
- adam/commands/reaper/reaper_schedules.py +4 -14
- adam/commands/reaper/reaper_status.py +8 -16
- adam/commands/reaper/utils_reaper.py +196 -0
- adam/commands/repair/repair.py +4 -22
- adam/commands/repair/repair_log.py +5 -11
- adam/commands/repair/repair_run.py +27 -34
- adam/commands/repair/repair_scan.py +32 -38
- adam/commands/repair/repair_stop.py +5 -11
- adam/commands/report.py +27 -29
- adam/commands/restart.py +25 -26
- adam/commands/rollout.py +19 -24
- adam/commands/shell.py +10 -4
- adam/commands/show/show.py +10 -25
- adam/commands/show/show_cassandra_repairs.py +35 -0
- adam/commands/show/show_cassandra_status.py +32 -43
- adam/commands/show/show_cassandra_version.py +5 -18
- adam/commands/show/show_commands.py +19 -24
- adam/commands/show/show_host.py +1 -1
- adam/commands/show/show_login.py +20 -27
- adam/commands/show/show_processes.py +15 -19
- adam/commands/show/show_storage.py +10 -20
- adam/commands/watch.py +26 -29
- adam/config.py +5 -14
- adam/embedded_params.py +1 -1
- adam/log.py +4 -4
- adam/pod_exec_result.py +3 -3
- adam/repl.py +40 -103
- adam/repl_commands.py +32 -16
- adam/repl_state.py +57 -28
- adam/sql/sql_completer.py +44 -28
- adam/sql/sql_state_machine.py +89 -28
- adam/sso/authn_ad.py +6 -8
- adam/sso/authn_okta.py +4 -6
- adam/sso/cred_cache.py +3 -5
- adam/sso/idp.py +9 -12
- adam/utils.py +435 -6
- adam/utils_athena.py +57 -37
- adam/utils_audits.py +12 -14
- adam/utils_issues.py +32 -0
- adam/utils_k8s/app_clusters.py +13 -18
- adam/utils_k8s/app_pods.py +2 -0
- adam/utils_k8s/cassandra_clusters.py +22 -19
- adam/utils_k8s/cassandra_nodes.py +2 -2
- adam/utils_k8s/custom_resources.py +16 -17
- adam/utils_k8s/ingresses.py +2 -2
- adam/utils_k8s/jobs.py +7 -11
- adam/utils_k8s/k8s.py +87 -0
- adam/utils_k8s/pods.py +40 -77
- adam/utils_k8s/secrets.py +4 -4
- adam/utils_k8s/service_accounts.py +5 -4
- adam/utils_k8s/services.py +2 -2
- adam/utils_k8s/statefulsets.py +1 -12
- adam/utils_net.py +4 -4
- adam/utils_repl/__init__.py +0 -0
- adam/utils_repl/automata_completer.py +48 -0
- adam/utils_repl/repl_completer.py +46 -0
- adam/utils_repl/state_machine.py +173 -0
- adam/utils_sqlite.py +137 -0
- adam/version.py +1 -1
- {kaqing-2.0.115.dist-info → kaqing-2.0.172.dist-info}/METADATA +1 -1
- kaqing-2.0.172.dist-info/RECORD +230 -0
- adam/commands/app.py +0 -67
- adam/commands/app_ping.py +0 -44
- adam/commands/cql/cql_utils.py +0 -204
- adam/commands/devices.py +0 -147
- adam/commands/export/export_on_x.py +0 -76
- adam/commands/export/export_rmdbs.py +0 -65
- adam/commands/postgres/postgres_utils.py +0 -31
- adam/commands/reaper/reaper_session.py +0 -159
- adam/commands/show/show_app_actions.py +0 -56
- adam/commands/show/show_app_id.py +0 -47
- adam/commands/show/show_app_queues.py +0 -45
- adam/commands/show/show_repairs.py +0 -47
- adam/utils_export.py +0 -42
- kaqing-2.0.115.dist-info/RECORD +0 -203
- {kaqing-2.0.115.dist-info → kaqing-2.0.172.dist-info}/WHEEL +0 -0
- {kaqing-2.0.115.dist-info → kaqing-2.0.172.dist-info}/entry_points.txt +0 -0
- {kaqing-2.0.115.dist-info → kaqing-2.0.172.dist-info}/top_level.txt +0 -0
adam/__init__.py
CHANGED
adam/app_session.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import threading
|
|
3
3
|
import time
|
|
4
|
-
import traceback
|
|
5
4
|
import requests
|
|
6
5
|
from urllib.parse import urlparse
|
|
7
6
|
|
|
@@ -9,7 +8,7 @@ from adam.log import Log
|
|
|
9
8
|
from adam.sso.idp import Idp
|
|
10
9
|
from adam.sso.idp_login import IdpLogin
|
|
11
10
|
from adam.config import Config
|
|
12
|
-
from adam.utils import json_to_csv, lines_to_tabular, log, log2
|
|
11
|
+
from adam.utils import debug, debug_trace, json_to_csv, lines_to_tabular, log, log2, log_exc
|
|
13
12
|
from adam.apps import Apps
|
|
14
13
|
|
|
15
14
|
class AppLogin:
|
|
@@ -62,11 +61,9 @@ class AppSession:
|
|
|
62
61
|
if r.status_code >= 200 and r.status_code < 300 or r.status_code == 400:
|
|
63
62
|
try:
|
|
64
63
|
js = r.json()
|
|
65
|
-
|
|
64
|
+
with log_exc(js):
|
|
66
65
|
header, lines = json_to_csv(js, delimiter='\t')
|
|
67
66
|
log(lines_to_tabular(lines, header=header, separator='\t'))
|
|
68
|
-
except:
|
|
69
|
-
log(js)
|
|
70
67
|
except:
|
|
71
68
|
if urlparse(r.url).hostname != urlparse(uri).hostname and not retried:
|
|
72
69
|
app_login = app_session.login(idp_uri=app_login.idp_uri, forced=forced, use_token_from_env=False, use_cached_creds=False)
|
|
@@ -76,7 +73,7 @@ class AppSession:
|
|
|
76
73
|
|
|
77
74
|
if r.text:
|
|
78
75
|
log2(f'{r.status_code} {r.url} Failed parsing the results.')
|
|
79
|
-
|
|
76
|
+
debug(r.text)
|
|
80
77
|
else:
|
|
81
78
|
log2(r.status_code)
|
|
82
79
|
log2(r.text)
|
|
@@ -115,7 +112,7 @@ class AppSession:
|
|
|
115
112
|
try:
|
|
116
113
|
# oidc/login may hang
|
|
117
114
|
timeout = Config().get('app.login.timeout', 5)
|
|
118
|
-
|
|
115
|
+
debug(f'-> {idp_login.app_login_url}')
|
|
119
116
|
session.post(idp_login.app_login_url, headers=headers, data=form_data, timeout=timeout)
|
|
120
117
|
except Exception:
|
|
121
118
|
pass
|
|
@@ -133,7 +130,7 @@ class AppSession:
|
|
|
133
130
|
check_uri = Config().get('app.login.session-check-url', 'https://{host}/{env}/{app}/api/8/C3/userSessionToken')
|
|
134
131
|
check_uri = check_uri.replace('{host}', self.host).replace('{env}', self.env).replace('{app}', 'c3')
|
|
135
132
|
r = session.get(check_uri)
|
|
136
|
-
|
|
133
|
+
debug(f'{r.status_code} {check_uri}')
|
|
137
134
|
|
|
138
135
|
res_text = r.text
|
|
139
136
|
js = json.loads(res_text)
|
|
@@ -142,10 +139,10 @@ class AppSession:
|
|
|
142
139
|
break
|
|
143
140
|
|
|
144
141
|
app_access_token = js['signedToken']
|
|
145
|
-
|
|
142
|
+
debug(f'{r.text}')
|
|
146
143
|
|
|
147
144
|
self.app_login = AppLogin(session, app_access_token, idp_uri)
|
|
148
|
-
except Exception:
|
|
145
|
+
except Exception as e:
|
|
149
146
|
try:
|
|
150
147
|
need = urlparse(r.url).hostname
|
|
151
148
|
if idp_login.idp_uri:
|
|
@@ -158,7 +155,7 @@ class AppSession:
|
|
|
158
155
|
log2(f"Invalid username/password.")
|
|
159
156
|
break
|
|
160
157
|
finally:
|
|
161
|
-
|
|
158
|
+
debug_trace()
|
|
162
159
|
|
|
163
160
|
if 'res_text' in locals():
|
|
164
161
|
Log.log_to_file(res_text)
|
adam/batch.py
CHANGED
|
@@ -35,7 +35,7 @@ from adam.cli_group import cli
|
|
|
35
35
|
@click.option('--param', '-v', multiple=True, metavar='<key>=<value>', help='parameter override')
|
|
36
36
|
@click.argument('extra_args', nargs=-1, metavar='repair', type=click.UNPROCESSED)
|
|
37
37
|
def audit(kubeconfig: str, config: str, param: list[str], extra_args):
|
|
38
|
-
run_command(Audit(), kubeconfig, config, param, None, None, None, extra_args)
|
|
38
|
+
run_command(Audit(), kubeconfig, config, param, None, None, None, extra_args, device=ReplState.L)
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
@cli.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True), cls=ClusterOrPodCommandHelper, help='Run a single bash command.')
|
|
@@ -281,14 +281,14 @@ def watch(kubeconfig: str, config: str, param: list[str], cluster: str, namespac
|
|
|
281
281
|
run_command(Watch(), kubeconfig, config, param, cluster, namespace, None, extra_args)
|
|
282
282
|
|
|
283
283
|
|
|
284
|
-
def run_command(cmd: Command, kubeconfig: str, config: str, params: list[str], cluster:str, namespace: str, pod: str, extra_args):
|
|
284
|
+
def run_command(cmd: Command, kubeconfig: str, config: str, params: list[str], cluster:str, namespace: str, pod: str, extra_args, device=ReplState.C):
|
|
285
285
|
is_user_entry = False
|
|
286
286
|
|
|
287
287
|
KubeContext.init_config(kubeconfig, is_user_entry=is_user_entry)
|
|
288
288
|
if not KubeContext.init_params(config, params, is_user_entry=is_user_entry):
|
|
289
289
|
return
|
|
290
290
|
|
|
291
|
-
state = ReplState(ns_sts=cluster, pod=pod, namespace=namespace)
|
|
291
|
+
state = ReplState(device=device, ns_sts=cluster, pod=pod, namespace=namespace)
|
|
292
292
|
if cmd.command() == 'pg' and not extra_args:
|
|
293
293
|
state, _ = state.apply_args(extra_args)
|
|
294
294
|
state.device = ReplState.P
|
adam/checks/check_utils.py
CHANGED
|
@@ -1,7 +1,3 @@
|
|
|
1
|
-
from collections.abc import Callable
|
|
2
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
3
|
-
import time
|
|
4
|
-
|
|
5
1
|
from adam.checks.check import Check
|
|
6
2
|
from adam.checks.check_context import CheckContext
|
|
7
3
|
from adam.checks.check_result import CheckResult
|
|
@@ -14,10 +10,9 @@ from adam.checks.memory import Memory
|
|
|
14
10
|
from adam.checks.status import Status
|
|
15
11
|
from adam.config import Config
|
|
16
12
|
from adam.utils_k8s.cassandra_nodes import CassandraNodes
|
|
17
|
-
from adam.utils_k8s.kube_context import KubeContext
|
|
18
13
|
from adam.utils_k8s.secrets import Secrets
|
|
19
14
|
from adam.utils_k8s.statefulsets import StatefulSets
|
|
20
|
-
from adam.utils import
|
|
15
|
+
from adam.utils import parallelize, log2
|
|
21
16
|
|
|
22
17
|
def all_checks() -> list[Check]:
|
|
23
18
|
return [CompactionStats(), Cpu(), Gossip(), Memory(), Disk(), Status()]
|
|
@@ -38,57 +33,30 @@ def checks_from_csv(check_str: str):
|
|
|
38
33
|
|
|
39
34
|
return checks
|
|
40
35
|
|
|
41
|
-
def run_checks(cluster: str = None, namespace: str = None, pod: str = None, checks: list[Check] = None,
|
|
36
|
+
def run_checks(cluster: str = None, namespace: str = None, pod: str = None, checks: list[Check] = None, show_out=True):
|
|
42
37
|
if not checks:
|
|
43
38
|
checks = all_checks()
|
|
44
39
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
action = 'issues'
|
|
48
|
-
crs: list[CheckResult] = []
|
|
49
|
-
|
|
50
|
-
def on_clusters(f: Callable[[any, list[str]], any]):
|
|
51
|
-
for ss, ns in sss:
|
|
52
|
-
if (not cluster or cluster == ss) and (not namespace or namespace == ns):
|
|
53
|
-
pods = StatefulSets.pods(ss, ns)
|
|
54
|
-
for pod_name in [pod.metadata.name for pod in pods]:
|
|
55
|
-
if not pod or pod == pod_name:
|
|
56
|
-
f(ss, ns, pod_name, show_output)
|
|
57
|
-
|
|
58
|
-
max_workers = Config().action_workers(action, 30)
|
|
59
|
-
if max_workers < 2:
|
|
60
|
-
def serial(ss, ns, pod_name, show_output):
|
|
61
|
-
if not pod or pod == pod_name:
|
|
62
|
-
crs.append(run_checks_on_pod(checks, ss[0], ns, pod_name, show_output))
|
|
63
|
-
|
|
64
|
-
on_clusters(serial)
|
|
65
|
-
else:
|
|
66
|
-
if KubeContext.show_parallelism():
|
|
67
|
-
log2(f'Executing on all nodes from statefulset with {max_workers} workers...')
|
|
68
|
-
start_time = time.time()
|
|
69
|
-
try:
|
|
70
|
-
futures = []
|
|
71
|
-
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
72
|
-
def submit(ss, ns, pod_name, show_output):
|
|
73
|
-
f = executor.submit(run_checks_on_pod, checks, ss, ns, pod_name, show_output,)
|
|
74
|
-
if f: futures.append(f)
|
|
75
|
-
|
|
76
|
-
on_clusters(submit)
|
|
40
|
+
sts_ns: list[tuple[str, str]] = StatefulSets.list_sts_name_and_ns()
|
|
77
41
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
42
|
+
sts_ns_pods: list[tuple[str, str, str]] = []
|
|
43
|
+
for sts, ns in sts_ns:
|
|
44
|
+
if (not cluster or cluster == sts) and (not namespace or namespace == ns):
|
|
45
|
+
pods = StatefulSets.pods(sts, ns)
|
|
46
|
+
for pod_name in [pod.metadata.name for pod in pods]:
|
|
47
|
+
if not pod or pod == pod_name:
|
|
48
|
+
sts_ns_pods.append((sts, ns, pod_name))
|
|
82
49
|
|
|
83
|
-
|
|
50
|
+
with parallelize(sts_ns_pods, Config().action_workers('issues', 30), msg='d`Running|Ran checks on {size} pods') as exec:
|
|
51
|
+
return exec.map(lambda sts_ns_pod: run_checks_on_pod(checks, sts_ns_pod[0], sts_ns_pod[1], sts_ns_pod[2], show_out))
|
|
84
52
|
|
|
85
|
-
def run_checks_on_pod(checks: list[Check], cluster: str = None, namespace: str = None, pod: str = None,
|
|
53
|
+
def run_checks_on_pod(checks: list[Check], cluster: str = None, namespace: str = None, pod: str = None, show_out=True):
|
|
86
54
|
host_id = CassandraNodes.get_host_id(pod, namespace)
|
|
87
55
|
user, pw = Secrets.get_user_pass(pod, namespace)
|
|
88
56
|
results = {}
|
|
89
57
|
issues: list[Issue] = []
|
|
90
58
|
for c in checks:
|
|
91
|
-
check_results = c.check(CheckContext(cluster, host_id, pod, namespace, user, pw, show_output=
|
|
59
|
+
check_results = c.check(CheckContext(cluster, host_id, pod, namespace, user, pw, show_output=show_out))
|
|
92
60
|
if check_results.details:
|
|
93
61
|
results = results | {check_results.name: check_results.details}
|
|
94
62
|
if check_results.issues:
|
adam/checks/cpu.py
CHANGED
|
@@ -7,6 +7,7 @@ from adam.checks.issue import Issue
|
|
|
7
7
|
from adam.config import Config
|
|
8
8
|
from adam.utils_k8s.cassandra_nodes import CassandraNodes
|
|
9
9
|
from adam.utils_k8s.custom_resources import CustomResources
|
|
10
|
+
from adam.utils_k8s.pods import Pods
|
|
10
11
|
|
|
11
12
|
class Cpu(Check):
|
|
12
13
|
def name(self):
|
|
@@ -20,10 +21,15 @@ class Cpu(Check):
|
|
|
20
21
|
'namespace': ctx.namespace,
|
|
21
22
|
'statefulset': ctx.statefulset,
|
|
22
23
|
'cpu': 'Unknown',
|
|
23
|
-
'idle': 'Unknown'
|
|
24
|
+
'idle': 'Unknown',
|
|
25
|
+
'limit': 'NA'
|
|
24
26
|
}
|
|
25
27
|
|
|
26
28
|
try:
|
|
29
|
+
container = Pods.get_container(ctx.namespace, ctx.pod, container_name='cassandra')
|
|
30
|
+
if container.resources.limits and "cpu" in container.resources.limits:
|
|
31
|
+
details['limit'] = container.resources.limits["cpu"]
|
|
32
|
+
|
|
27
33
|
idle = 'Unknown'
|
|
28
34
|
result = CassandraNodes.exec(ctx.pod, ctx.namespace, "mpstat 5 2 | grep Average | awk '{print $NF}'", show_out=ctx.show_output)
|
|
29
35
|
lines = result.stdout.strip(' \r\n').split('\n')
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from kubernetes.utils import parse_quantity
|
|
2
|
+
|
|
3
|
+
from adam.checks.check import Check
|
|
4
|
+
from adam.checks.check_context import CheckContext
|
|
5
|
+
from adam.checks.check_result import CheckResult
|
|
6
|
+
from adam.checks.issue import Issue
|
|
7
|
+
from adam.config import Config
|
|
8
|
+
from adam.utils_k8s.custom_resources import CustomResources
|
|
9
|
+
from adam.utils_k8s.pods import Pods
|
|
10
|
+
|
|
11
|
+
class CpuMetrics(Check):
|
|
12
|
+
def name(self):
|
|
13
|
+
return 'cpu-metrics'
|
|
14
|
+
|
|
15
|
+
def check(self, ctx: CheckContext) -> CheckResult:
|
|
16
|
+
issues: list[Issue] = []
|
|
17
|
+
|
|
18
|
+
details = {
|
|
19
|
+
'name': ctx.pod,
|
|
20
|
+
'namespace': ctx.namespace,
|
|
21
|
+
'statefulset': ctx.statefulset,
|
|
22
|
+
'cpu': 'Unknown',
|
|
23
|
+
'limit': 'NA'
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
container = Pods.get_container(ctx.namespace, ctx.pod, container_name='cassandra')
|
|
28
|
+
if container.resources.limits and "cpu" in container.resources.limits:
|
|
29
|
+
details['limit'] = container.resources.limits["cpu"]
|
|
30
|
+
|
|
31
|
+
metrics = CustomResources.get_metrics(ctx.namespace, ctx.pod, container_name='cassandra')
|
|
32
|
+
usage = 'Unknown'
|
|
33
|
+
if metrics:
|
|
34
|
+
usage = details['cpu'] = metrics["usage"]["cpu"]
|
|
35
|
+
|
|
36
|
+
cpu_threshold = Config().get('checks.cpu-threshold', 0.0)
|
|
37
|
+
if cpu_threshold != 0.0 and usage != "Unknown" and parse_quantity(usage) > cpu_threshold:
|
|
38
|
+
issues.append(Issue(
|
|
39
|
+
statefulset=ctx.statefulset,
|
|
40
|
+
namespace=ctx.namespace,
|
|
41
|
+
pod=ctx.pod,
|
|
42
|
+
category='cpu',
|
|
43
|
+
desc=f'CPU is too busy: {usage}',
|
|
44
|
+
suggestion=f"qing restart {ctx.pod}@{ctx.namespace}"
|
|
45
|
+
))
|
|
46
|
+
except Exception as e:
|
|
47
|
+
issues.append(self.issue_from_err(sts_name=ctx.statefulset, ns=ctx.namespace, pod_name=ctx.pod, exception=e))
|
|
48
|
+
|
|
49
|
+
return CheckResult(self.name(), details, issues)
|
|
50
|
+
|
|
51
|
+
def help(self):
|
|
52
|
+
return f'{CpuMetrics().name()}: check cpu busy percentage with metrics'
|
adam/checks/disk.py
CHANGED
|
@@ -6,6 +6,7 @@ from adam.checks.check_context import CheckContext
|
|
|
6
6
|
from adam.checks.check_result import CheckResult
|
|
7
7
|
from adam.checks.issue import Issue
|
|
8
8
|
from adam.config import Config
|
|
9
|
+
from adam.utils import log_exc
|
|
9
10
|
from adam.utils_k8s.cassandra_nodes import CassandraNodes
|
|
10
11
|
|
|
11
12
|
class Disk(Check):
|
|
@@ -87,10 +88,8 @@ class Disk(Check):
|
|
|
87
88
|
|
|
88
89
|
ss_size = 0.0
|
|
89
90
|
if ss_out:
|
|
90
|
-
|
|
91
|
+
with log_exc():
|
|
91
92
|
ss_size = round(float(ss_out.strip(' \r\n')) / 1024 / 1024, 2)
|
|
92
|
-
except:
|
|
93
|
-
pass
|
|
94
93
|
|
|
95
94
|
def parse_du_out(l: str, default: str = None):
|
|
96
95
|
groups = re.match(r'^(\S+)\s+(\S+)$', l.strip('\r'))
|
adam/columns/columns.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from adam.columns.column import Column
|
|
2
2
|
from adam.columns.compactions import Compactions
|
|
3
3
|
from adam.columns.cpu import Cpu
|
|
4
|
+
from adam.columns.cpu_metrics import CpuMetrics
|
|
4
5
|
from adam.columns.dir_data import DataDir
|
|
5
6
|
from adam.columns.dir_snapshots import SnapshotsDir
|
|
6
7
|
from adam.columns.gossip import Gossip
|
|
@@ -23,7 +24,7 @@ class Columns:
|
|
|
23
24
|
COLUMNS_BY_NAME = None
|
|
24
25
|
|
|
25
26
|
def all_columns():
|
|
26
|
-
return [Compactions(), Cpu(), DataDir(), SnapshotsDir(), Gossip(), HostId(), Memory(),
|
|
27
|
+
return [Compactions(), Cpu(), CpuMetrics(), DataDir(), SnapshotsDir(), Gossip(), HostId(), Memory(),
|
|
27
28
|
NodeAddress(), NodeLoad(), NodeOwns(), NodeStatus(),NodeTokens(), PodName(), CassandraVolume(), RootVolume()]
|
|
28
29
|
|
|
29
30
|
def columns_by_name():
|
|
@@ -38,6 +39,7 @@ class Columns:
|
|
|
38
39
|
name = name.strip(' ')
|
|
39
40
|
if not name in Columns.COLUMNS_BY_NAME:
|
|
40
41
|
return None
|
|
42
|
+
|
|
41
43
|
cols.append(Columns.COLUMNS_BY_NAME[name]())
|
|
42
44
|
|
|
43
45
|
return cols
|
adam/columns/cpu.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from kubernetes.utils.quantity import parse_quantity
|
|
2
|
+
|
|
1
3
|
from adam.checks.check_result import CheckResult
|
|
2
4
|
from adam.checks.cpu import Cpu as CpuCheck
|
|
3
5
|
from adam.columns.column import Column
|
|
@@ -14,4 +16,4 @@ class Cpu(Column):
|
|
|
14
16
|
cpu = r.details[CpuCheck().name()]
|
|
15
17
|
busy = 100.0 - float(cpu['idle'])
|
|
16
18
|
|
|
17
|
-
return f'{round(busy)}%'
|
|
19
|
+
return f'{round(busy)}%/{parse_quantity(cpu["limit"]) * 100}%'
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from kubernetes.utils.quantity import parse_quantity
|
|
2
|
+
|
|
3
|
+
from adam.checks.check_result import CheckResult
|
|
4
|
+
from adam.checks.cpu_metrics import CpuMetrics as CpuCheck
|
|
5
|
+
from adam.columns.column import Column
|
|
6
|
+
|
|
7
|
+
class CpuMetrics(Column):
|
|
8
|
+
def name(self):
|
|
9
|
+
return 'cpu-metrics'
|
|
10
|
+
|
|
11
|
+
def checks(self):
|
|
12
|
+
return [CpuCheck()]
|
|
13
|
+
|
|
14
|
+
def pod_value(self, check_results: list[CheckResult], pod_name: str):
|
|
15
|
+
r = self.result_by_pod(check_results, pod_name)
|
|
16
|
+
cpu = r.details[CpuCheck().name()]
|
|
17
|
+
|
|
18
|
+
cpu_decimal = parse_quantity(cpu['cpu'])
|
|
19
|
+
cpu_limit = parse_quantity(cpu['limit'])
|
|
20
|
+
business = cpu_decimal * 100 / cpu_limit
|
|
21
|
+
|
|
22
|
+
return f"{business:.2f}%({cpu_decimal}/{cpu_limit})"
|
adam/columns/memory.py
CHANGED
|
@@ -3,6 +3,7 @@ from kubernetes.utils import parse_quantity
|
|
|
3
3
|
from adam.checks.check_result import CheckResult
|
|
4
4
|
from adam.checks.memory import Memory as MemoryCheck
|
|
5
5
|
from adam.columns.column import Column
|
|
6
|
+
from adam.utils import log_exc
|
|
6
7
|
|
|
7
8
|
class Memory(Column):
|
|
8
9
|
def name(self):
|
|
@@ -18,7 +19,5 @@ class Memory(Column):
|
|
|
18
19
|
return f"{Memory.to_g(mem['used'])}/{Memory.to_g(mem['limit'])}"
|
|
19
20
|
|
|
20
21
|
def to_g(v: str):
|
|
21
|
-
|
|
22
|
-
return f'{round(parse_quantity(v) / 1024 / 1024 / 1024, 2)}G'
|
|
23
|
-
except:
|
|
24
|
-
return v
|
|
22
|
+
with log_exc():
|
|
23
|
+
return f'{round(parse_quantity(v) / 1024 / 1024 / 1024, 2)}G'
|
adam/commands/__init__.py
CHANGED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from adam.commands.command import ExtractAllOptionsHandler, ExtractOptionsHandler, ExtractSequenceOptionsHandler, ExtractTrailingOptionsHandler
|
|
2
|
+
from adam.repl_state import ReplState
|
|
3
|
+
from adam.commands.app.utils_app import AppHandler
|
|
4
|
+
|
|
5
|
+
def app(state: ReplState) -> AppHandler:
|
|
6
|
+
return AppHandler(state)
|
|
7
|
+
|
|
8
|
+
def extract_options(args: list[str], options: list[str]):
|
|
9
|
+
return ExtractOptionsHandler(args, options = options)
|
|
10
|
+
|
|
11
|
+
def extract_trailing_options(args: list[str], trailing: list[str]):
|
|
12
|
+
return ExtractTrailingOptionsHandler(args, trailing = trailing)
|
|
13
|
+
|
|
14
|
+
def extract_all_options(args: list[str], trailing = None, sequence = None, options = None):
|
|
15
|
+
return ExtractAllOptionsHandler(args, trailing = trailing, sequence = sequence, options = options)
|
|
16
|
+
|
|
17
|
+
def extract_sequence(args: list[str], sequence: list[str]):
|
|
18
|
+
return ExtractSequenceOptionsHandler(args, sequence = sequence)
|
adam/commands/alter_tables.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
from adam.commands import extract_options
|
|
1
2
|
from adam.commands.command import Command
|
|
2
|
-
from adam.commands.cql.
|
|
3
|
+
from adam.commands.cql.utils_cql import cassandra, cassandra_tables as get_tables
|
|
3
4
|
from adam.config import Config
|
|
4
5
|
from adam.repl_state import ReplState, RequiredState
|
|
5
|
-
from adam.utils import log2
|
|
6
|
+
from adam.utils import log2, log_exc
|
|
6
7
|
|
|
7
8
|
class AlterTables(Command):
|
|
8
9
|
COMMAND = 'alter tables with'
|
|
@@ -26,52 +27,47 @@ class AlterTables(Command):
|
|
|
26
27
|
if not(args := self.args(cmd)):
|
|
27
28
|
return super().run(cmd, state)
|
|
28
29
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
for k, v in tables.items():
|
|
52
|
-
if k not in excludes or k == 'reaper_db' and include_reaper:
|
|
53
|
-
if batching:
|
|
54
|
-
# alter table <table_name> with GC_GRACE_SECONDS = <timeout>;
|
|
55
|
-
cql = ';\n'.join([f'alter table {k}.{t} with {arg_str}' for t in v])
|
|
56
|
-
try:
|
|
57
|
-
run_cql(state, cql, [], show_out=Config().is_debug(), on_any=True)
|
|
58
|
-
except Exception as e:
|
|
59
|
-
log2(e)
|
|
60
|
-
continue
|
|
61
|
-
else:
|
|
62
|
-
for t in v:
|
|
63
|
-
try:
|
|
30
|
+
with self.validate(args, state) as (args, state):
|
|
31
|
+
with extract_options(args, '--include-reaper') as (args, include_reaper):
|
|
32
|
+
if not args:
|
|
33
|
+
if state.in_repl:
|
|
34
|
+
log2('Please enter gc grace in seconds. e.g. alter gc-grace-seconds 3600')
|
|
35
|
+
else:
|
|
36
|
+
log2('* gc grace second is missing.')
|
|
37
|
+
log2()
|
|
38
|
+
Command.display_help()
|
|
39
|
+
|
|
40
|
+
return 'missing-arg'
|
|
41
|
+
|
|
42
|
+
arg_str = ' '.join(args)
|
|
43
|
+
|
|
44
|
+
excludes = [e.strip(' \r\n') for e in Config().get(
|
|
45
|
+
'cql.alter-tables.excludes',
|
|
46
|
+
'system_auth,system_traces,reaper_db,system_distributed,system_views,system,system_schema,system_virtual_schema').split(',')]
|
|
47
|
+
batching = Config().get('cql.alter-tables.batching', True)
|
|
48
|
+
tables = get_tables(state, on_any=True)
|
|
49
|
+
for k, v in tables.items():
|
|
50
|
+
if k not in excludes or k == 'reaper_db' and include_reaper:
|
|
51
|
+
if batching:
|
|
64
52
|
# alter table <table_name> with GC_GRACE_SECONDS = <timeout>;
|
|
65
|
-
cql = f'alter table {k}.{t} with {arg_str}'
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
53
|
+
cql = ';\n'.join([f'alter table {k}.{t} with {arg_str}' for t in v])
|
|
54
|
+
with log_exc(True):
|
|
55
|
+
with cassandra(state) as pods:
|
|
56
|
+
pods.cql(cql, show_out=Config().is_debug(), show_query=not Config().is_debug(), on_any=True)
|
|
57
|
+
continue
|
|
58
|
+
else:
|
|
59
|
+
for t in v:
|
|
60
|
+
with log_exc(True):
|
|
61
|
+
# alter table <table_name> with GC_GRACE_SECONDS = <timeout>;
|
|
62
|
+
cql = f'alter table {k}.{t} with {arg_str}'
|
|
63
|
+
with cassandra(state) as pods:
|
|
64
|
+
pods.cql(show_out=Config().is_debug(), show_query=not Config().is_debug(), on_any=True)
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
log2(f'{len(v)} tables altered in {k}.')
|
|
68
|
+
|
|
69
|
+
# do not continue to cql route
|
|
70
|
+
return state
|
|
75
71
|
|
|
76
72
|
def completion(self, _: ReplState) -> dict[str, any]:
|
|
77
73
|
# auto completion is taken care of by sql completer
|
adam/commands/audit/audit.py
CHANGED
|
@@ -7,14 +7,14 @@ from adam.commands.audit.show_slow10 import ShowSlow10
|
|
|
7
7
|
from adam.commands.audit.show_top10 import ShowTop10
|
|
8
8
|
from adam.commands.audit.utils_show_top10 import show_top10_completions_for_nesting
|
|
9
9
|
from adam.commands.command import Command
|
|
10
|
+
from adam.commands.intermediate_command import IntermediateCommand
|
|
10
11
|
from adam.config import Config
|
|
11
12
|
from adam.repl_state import ReplState
|
|
12
|
-
from adam.sql.sql_completer import SqlCompleter
|
|
13
|
-
from adam.utils import log2
|
|
13
|
+
from adam.sql.sql_completer import SqlCompleter, SqlVariant
|
|
14
|
+
from adam.utils import log2, wait_log
|
|
14
15
|
from adam.utils_athena import Athena
|
|
15
|
-
from adam.utils_audits import Audits
|
|
16
16
|
|
|
17
|
-
class Audit(
|
|
17
|
+
class Audit(IntermediateCommand):
|
|
18
18
|
COMMAND = 'audit'
|
|
19
19
|
|
|
20
20
|
# the singleton pattern
|
|
@@ -37,29 +37,26 @@ class Audit(Command):
|
|
|
37
37
|
if not(args := self.args(cmd)):
|
|
38
38
|
return super().run(cmd, state)
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
r = None
|
|
45
|
-
if len(args) > 0:
|
|
46
|
-
r = super().intermediate_run(cmd, state, args, Audit.cmd_list(), display_help=False)
|
|
40
|
+
with self.validate(args, state) as (args, state):
|
|
41
|
+
r = None
|
|
42
|
+
if len(args) > 0:
|
|
43
|
+
r = self.intermediate_run(cmd, state, args, self.cmd_list(), display_help=False)
|
|
47
44
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
45
|
+
if not r or isinstance(r, str) and r == 'command-missing':
|
|
46
|
+
sql = 'select * from audit order by ts desc limit 10'
|
|
47
|
+
if args:
|
|
48
|
+
sql = ' '.join(args)
|
|
49
|
+
else:
|
|
50
|
+
log2(sql)
|
|
54
51
|
|
|
55
|
-
|
|
52
|
+
Athena.run_query(sql)
|
|
56
53
|
|
|
57
|
-
|
|
54
|
+
return state
|
|
58
55
|
|
|
59
56
|
def completion(self, state: ReplState):
|
|
60
57
|
if state.device == ReplState.L:
|
|
61
58
|
if not self.schema_read:
|
|
62
|
-
|
|
59
|
+
wait_log(f'Inspecting audit database schema...')
|
|
63
60
|
self.schema_read = True
|
|
64
61
|
# warm up the caches first time when l: drive is accessed
|
|
65
62
|
Athena.table_names()
|
|
@@ -68,14 +65,16 @@ class Audit(Command):
|
|
|
68
65
|
|
|
69
66
|
return super().completion(state) | show_top10_completions_for_nesting() | SqlCompleter(
|
|
70
67
|
lambda: Athena.table_names(),
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
68
|
+
expandables={
|
|
69
|
+
'columns': lambda table: Athena.column_names(),
|
|
70
|
+
'partition_columns': lambda table: Athena.column_names(partition_cols_only=True)
|
|
71
|
+
},
|
|
72
|
+
variant=SqlVariant.ATHENA
|
|
74
73
|
).completions_for_nesting()
|
|
75
74
|
|
|
76
75
|
return {}
|
|
77
76
|
|
|
78
|
-
def cmd_list():
|
|
77
|
+
def cmd_list(self):
|
|
79
78
|
return [AuditRepairTables(), AuditRun(), ShowLast10(), ShowSlow10(), ShowTop10()]
|
|
80
79
|
|
|
81
80
|
def help(self, _: ReplState):
|
|
@@ -83,4 +82,4 @@ class Audit(Command):
|
|
|
83
82
|
|
|
84
83
|
class AuditCommandHelper(click.Command):
|
|
85
84
|
def get_help(self, ctx: click.Context):
|
|
86
|
-
|
|
85
|
+
IntermediateCommand.intermediate_help(super().get_help(ctx), Audit.COMMAND, Audit().cmd_list(), show_cluster_help=False)
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import concurrent
|
|
2
1
|
import time
|
|
3
2
|
|
|
4
3
|
from adam.commands.command import Command
|
|
@@ -31,25 +30,23 @@ class AuditRepairTables(Command):
|
|
|
31
30
|
if not(args := self.args(cmd)):
|
|
32
31
|
return super().run(cmd, state)
|
|
33
32
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
tables = Config().get('audit.athena.repair-partition-tables', 'audit').split(',')
|
|
39
|
-
if args:
|
|
40
|
-
tables = args
|
|
33
|
+
with self.validate(args, state) as (args, state):
|
|
34
|
+
tables = Config().get('audit.athena.repair-partition-tables', 'audit').split(',')
|
|
35
|
+
if args:
|
|
36
|
+
tables = args
|
|
41
37
|
|
|
42
|
-
|
|
43
|
-
|
|
38
|
+
meta = Audits.get_meta()
|
|
39
|
+
self.repair(tables, meta)
|
|
44
40
|
|
|
45
|
-
|
|
41
|
+
return state
|
|
46
42
|
|
|
47
43
|
def completion(self, state: ReplState):
|
|
44
|
+
# trigger auto repair if on l: drive
|
|
48
45
|
if state.device == ReplState.L:
|
|
49
46
|
if not self.auto_repaired:
|
|
50
47
|
if hours := Config().get('audit.athena.auto-repair.elapsed_hours', 12):
|
|
51
|
-
with
|
|
52
|
-
|
|
48
|
+
with Audits.offload() as exec:
|
|
49
|
+
exec.submit(lambda: self.auto_repair(hours))
|
|
53
50
|
|
|
54
51
|
return super().completion(state)
|
|
55
52
|
|
|
@@ -65,13 +62,13 @@ class AuditRepairTables(Command):
|
|
|
65
62
|
log2(f'Audit tables have been auto-repaired.')
|
|
66
63
|
|
|
67
64
|
def repair(self, tables: list[str], meta: AuditMeta, show_sql = False):
|
|
68
|
-
with
|
|
65
|
+
with Audits.offload() as exec:
|
|
69
66
|
for table in tables:
|
|
70
67
|
if show_sql:
|
|
71
68
|
log(f'MSCK REPAIR TABLE {table}')
|
|
72
69
|
|
|
73
|
-
|
|
74
|
-
|
|
70
|
+
exec.submit(Athena.query, f'MSCK REPAIR TABLE {table}', None,)
|
|
71
|
+
exec.submit(Audits.put_meta, Audits.PARTITIONS_ADDED, meta,)
|
|
75
72
|
|
|
76
73
|
def help(self, _: ReplState):
|
|
77
|
-
return f"{AuditRepairTables.COMMAND}
|
|
74
|
+
return f"{AuditRepairTables.COMMAND}\t run MSCK REPAIR command for new partition discovery"
|