kaqing 2.0.104__py3-none-any.whl → 2.0.105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kaqing might be problematic. Click here for more details.
- adam/commands/audit/audit.py +8 -8
- adam/commands/audit/audit_repair_tables.py +6 -7
- adam/commands/audit/audit_run.py +8 -6
- adam/commands/ls.py +2 -2
- adam/commands/preview_table.py +3 -3
- adam/repl_commands.py +2 -2
- adam/utils_audits.py +167 -0
- adam/version.py +1 -1
- {kaqing-2.0.104.dist-info → kaqing-2.0.105.dist-info}/METADATA +1 -1
- {kaqing-2.0.104.dist-info → kaqing-2.0.105.dist-info}/RECORD +13 -13
- adam/utils_athena.py +0 -162
- {kaqing-2.0.104.dist-info → kaqing-2.0.105.dist-info}/WHEEL +0 -0
- {kaqing-2.0.104.dist-info → kaqing-2.0.105.dist-info}/entry_points.txt +0 -0
- {kaqing-2.0.104.dist-info → kaqing-2.0.105.dist-info}/top_level.txt +0 -0
adam/commands/audit/audit.py
CHANGED
|
@@ -7,7 +7,7 @@ from adam.config import Config
|
|
|
7
7
|
from adam.repl_state import ReplState
|
|
8
8
|
from adam.sql.sql_completer import SqlCompleter
|
|
9
9
|
from adam.utils import log2
|
|
10
|
-
from adam.
|
|
10
|
+
from adam.utils_audits import Audits
|
|
11
11
|
|
|
12
12
|
class Audit(Command):
|
|
13
13
|
COMMAND = 'audit'
|
|
@@ -42,7 +42,7 @@ class Audit(Command):
|
|
|
42
42
|
else:
|
|
43
43
|
log2(sql)
|
|
44
44
|
|
|
45
|
-
run_audit_query(sql)
|
|
45
|
+
Audits.run_audit_query(sql)
|
|
46
46
|
|
|
47
47
|
return state
|
|
48
48
|
|
|
@@ -52,13 +52,13 @@ class Audit(Command):
|
|
|
52
52
|
Config().wait_log(f'Inspecting audit database schema...')
|
|
53
53
|
self.schema_read = True
|
|
54
54
|
# warm up the caches first time when l: drive is accessed
|
|
55
|
-
audit_column_names()
|
|
56
|
-
audit_column_names(partition_cols_only=True)
|
|
55
|
+
Audits.audit_column_names()
|
|
56
|
+
Audits.audit_column_names(partition_cols_only=True)
|
|
57
57
|
|
|
58
58
|
return super().completion(state) | SqlCompleter(
|
|
59
|
-
lambda: audit_table_names(),
|
|
60
|
-
columns=lambda table: audit_column_names(),
|
|
61
|
-
partition_columns=lambda table: audit_column_names(partition_cols_only=True),
|
|
59
|
+
lambda: Audits.audit_table_names(),
|
|
60
|
+
columns=lambda table: Audits.audit_column_names(),
|
|
61
|
+
partition_columns=lambda table: Audits.audit_column_names(partition_cols_only=True),
|
|
62
62
|
variant='athena'
|
|
63
63
|
).completions_for_nesting()
|
|
64
64
|
|
|
@@ -68,7 +68,7 @@ class Audit(Command):
|
|
|
68
68
|
return [AuditRepairTables(), AuditRun()]
|
|
69
69
|
|
|
70
70
|
def help(self, _: ReplState):
|
|
71
|
-
return f'[{Audit.COMMAND}] <sql-statements
|
|
71
|
+
return f'[{Audit.COMMAND}] [<sql-statements>]\t run SQL queries on Authena audit database'
|
|
72
72
|
|
|
73
73
|
class AuditCommandHelper(click.Command):
|
|
74
74
|
def get_help(self, ctx: click.Context):
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import concurrent
|
|
2
2
|
import time
|
|
3
|
-
import requests
|
|
4
3
|
|
|
5
4
|
from adam.commands.command import Command
|
|
6
5
|
from adam.config import Config
|
|
7
6
|
from adam.repl_state import ReplState
|
|
8
7
|
from adam.utils import log, log2
|
|
9
|
-
from adam.
|
|
8
|
+
from adam.utils_audits import AuditMeta, Audits
|
|
10
9
|
|
|
11
10
|
class AuditRepairTables(Command):
|
|
12
11
|
COMMAND = 'audit repair'
|
|
@@ -34,7 +33,7 @@ class AuditRepairTables(Command):
|
|
|
34
33
|
if args:
|
|
35
34
|
tables = args
|
|
36
35
|
|
|
37
|
-
meta = get_meta()
|
|
36
|
+
meta = Audits.get_meta()
|
|
38
37
|
self.repair(tables, meta)
|
|
39
38
|
|
|
40
39
|
return state
|
|
@@ -53,8 +52,8 @@ class AuditRepairTables(Command):
|
|
|
53
52
|
def auto_repair(self, hours: int):
|
|
54
53
|
self.auto_repaired = True
|
|
55
54
|
|
|
56
|
-
meta = get_meta()
|
|
57
|
-
if meta.
|
|
55
|
+
meta: AuditMeta = Audits.get_meta()
|
|
56
|
+
if meta.partitions_last_checked + hours * 60 * 60 < time.time():
|
|
58
57
|
tables = Config().get('audit.athena.repair-partition-tables', 'audit').split(',')
|
|
59
58
|
self.repair(tables, meta, show_sql=True)
|
|
60
59
|
log2(f'Audit tables have been auto-repaired.')
|
|
@@ -65,8 +64,8 @@ class AuditRepairTables(Command):
|
|
|
65
64
|
if show_sql:
|
|
66
65
|
log(f'MSCK REPAIR TABLE {table}')
|
|
67
66
|
|
|
68
|
-
executor.submit(run_audit_query, f'MSCK REPAIR TABLE {table}', None,)
|
|
69
|
-
executor.submit(put_meta,
|
|
67
|
+
executor.submit(Audits.run_audit_query, f'MSCK REPAIR TABLE {table}', None,)
|
|
68
|
+
executor.submit(Audits.put_meta, Audits.PARTITIONS_ADDED, meta,)
|
|
70
69
|
|
|
71
70
|
def help(self, _: ReplState):
|
|
72
71
|
return f"{AuditRepairTables.COMMAND} \t run MSCK REPAIR command for new partition discovery"
|
adam/commands/audit/audit_run.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import concurrent
|
|
1
2
|
from adam.commands.command import Command
|
|
2
3
|
from adam.config import Config
|
|
3
4
|
from adam.repl_state import ReplState
|
|
4
5
|
from adam.utils import log2
|
|
5
|
-
from adam.
|
|
6
|
+
from adam.utils_audits import AuditMeta, Audits
|
|
6
7
|
|
|
7
8
|
class AuditRun(Command):
|
|
8
9
|
COMMAND = 'audit run'
|
|
@@ -26,14 +27,15 @@ class AuditRun(Command):
|
|
|
26
27
|
|
|
27
28
|
state, args = self.apply_state(args, state)
|
|
28
29
|
|
|
29
|
-
meta: AuditMeta = get_meta()
|
|
30
|
-
clusters = find_new_clusters(meta.cluster_last_checked)
|
|
30
|
+
meta: AuditMeta = Audits.get_meta()
|
|
31
|
+
clusters = Audits.find_new_clusters(meta.cluster_last_checked)
|
|
32
|
+
Audits.put_meta(Audits.ADD_CLUSTERS, meta, clusters=clusters)
|
|
31
33
|
if clusters:
|
|
32
|
-
put_meta('add-clusters', meta, clusters=clusters)
|
|
33
34
|
log2(f'Added {len(clusters)} new clusters.')
|
|
34
35
|
tables = Config().get('audit.athena.repair-cluster-tables', 'cluster').split(',')
|
|
35
|
-
|
|
36
|
-
|
|
36
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=Config().get('audit.workers', 3)) as executor:
|
|
37
|
+
for table in tables:
|
|
38
|
+
Audits.run_audit_query(f'MSCK REPAIR TABLE {table}')
|
|
37
39
|
else:
|
|
38
40
|
log2(f'No new clusters were found.')
|
|
39
41
|
|
adam/commands/ls.py
CHANGED
|
@@ -14,7 +14,7 @@ from adam.pod_exec_result import PodExecResult
|
|
|
14
14
|
from adam.repl_state import ReplState
|
|
15
15
|
from adam.utils import lines_to_tabular, log, log2
|
|
16
16
|
from adam.apps import Apps
|
|
17
|
-
from adam.
|
|
17
|
+
from adam.utils_audits import Audits
|
|
18
18
|
|
|
19
19
|
class Ls(Command):
|
|
20
20
|
COMMAND = 'ls'
|
|
@@ -131,7 +131,7 @@ class Ls(Command):
|
|
|
131
131
|
log(lines_to_tabular(pg_table_names(pg.namespace, pg.path()), 'NAME', separator=','))
|
|
132
132
|
|
|
133
133
|
def show_audit_log_tables(self):
|
|
134
|
-
log(lines_to_tabular(audit_table_names(), 'NAME', separator=','))
|
|
134
|
+
log(lines_to_tabular(Audits.audit_table_names(), 'NAME', separator=','))
|
|
135
135
|
|
|
136
136
|
def completion(self, state: ReplState):
|
|
137
137
|
if state.pod:
|
adam/commands/preview_table.py
CHANGED
|
@@ -4,7 +4,7 @@ from adam.commands.postgres.postgres_context import PostgresContext
|
|
|
4
4
|
from adam.config import Config
|
|
5
5
|
from adam.repl_state import ReplState, RequiredState
|
|
6
6
|
from adam.utils import lines_to_tabular, log, log2
|
|
7
|
-
from adam.
|
|
7
|
+
from adam.utils_audits import Audits
|
|
8
8
|
|
|
9
9
|
class PreviewTable(Command):
|
|
10
10
|
COMMAND = 'preview'
|
|
@@ -43,7 +43,7 @@ class PreviewTable(Command):
|
|
|
43
43
|
lines = [db["name"] for db in pg.tables() if db["schema"] == PostgresContext.default_schema()]
|
|
44
44
|
log(lines_to_tabular(lines, separator=','))
|
|
45
45
|
elif state.device == ReplState.L:
|
|
46
|
-
log(lines_to_tabular(audit_table_names(), separator=','))
|
|
46
|
+
log(lines_to_tabular(Audits.audit_table_names(), separator=','))
|
|
47
47
|
else:
|
|
48
48
|
log(lines_to_tabular(cassandra_table_names(state), separator=','))
|
|
49
49
|
|
|
@@ -66,7 +66,7 @@ class PreviewTable(Command):
|
|
|
66
66
|
if state.device == ReplState.P:
|
|
67
67
|
PostgresContext.apply(state.namespace, state.pg_path).run_sql(f'select * from {table} limit {rows}')
|
|
68
68
|
elif state.device == ReplState.L:
|
|
69
|
-
run_audit_query(f'select * from {table} limit {rows}')
|
|
69
|
+
Audits.run_audit_query(f'select * from {table} limit {rows}')
|
|
70
70
|
else:
|
|
71
71
|
run_cql(state, f'select * from {table} limit {rows}', show_out=True, use_single_quotes=True, on_any=True)
|
|
72
72
|
|
adam/repl_commands.py
CHANGED
|
@@ -58,7 +58,7 @@ class ReplCommands:
|
|
|
58
58
|
cmds: list[Command] = ReplCommands.navigation() + ReplCommands.cassandra_check() + ReplCommands.cassandra_ops() + \
|
|
59
59
|
ReplCommands.tools() + ReplCommands.app() + ReplCommands.exit()
|
|
60
60
|
|
|
61
|
-
intermediate_cmds: list[Command] = [App(), Reaper(), Repair(), Deploy(), Show(), Undeploy()]
|
|
61
|
+
intermediate_cmds: list[Command] = [App(), Audit(), Reaper(), Repair(), Deploy(), Show(), Undeploy()]
|
|
62
62
|
ic = [c.command() for c in intermediate_cmds]
|
|
63
63
|
# 1. dedup commands
|
|
64
64
|
deduped = []
|
|
@@ -86,7 +86,7 @@ class ReplCommands:
|
|
|
86
86
|
|
|
87
87
|
def tools() -> list[Command]:
|
|
88
88
|
return [Cqlsh(), Postgres(), Bash(), Shell(), CodeStart(), CodeStop(), DeployFrontend(), UndeployFrontend(),
|
|
89
|
-
DeployPod(), UndeployPod(), DeployPgAgent(), UndeployPgAgent(),
|
|
89
|
+
DeployPod(), UndeployPod(), DeployPgAgent(), UndeployPgAgent(), Audit()] + Audit.cmd_list()
|
|
90
90
|
|
|
91
91
|
def app() -> list[Command]:
|
|
92
92
|
return [ShowAppActions(), ShowAppId(), ShowAppQueues(), AppPing(), App()]
|
adam/utils_audits.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
import functools
|
|
3
|
+
import time
|
|
4
|
+
import boto3
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
from adam.config import Config
|
|
8
|
+
from adam.utils import lines_to_tabular, log, log2
|
|
9
|
+
|
|
10
|
+
class AuditMeta:
|
|
11
|
+
def __init__(self, partitions_last_checked: float, cluster_last_checked: float):
|
|
12
|
+
self.partitions_last_checked = partitions_last_checked
|
|
13
|
+
self.cluster_last_checked = cluster_last_checked
|
|
14
|
+
|
|
15
|
+
# no state utility class
|
|
16
|
+
class Audits:
|
|
17
|
+
PARTITIONS_ADDED = 'partitions-added'
|
|
18
|
+
ADD_CLUSTERS = 'add-clusters'
|
|
19
|
+
|
|
20
|
+
def get_meta() -> AuditMeta:
|
|
21
|
+
checked_in = 0.0
|
|
22
|
+
cluster_last_checked = 0.0
|
|
23
|
+
|
|
24
|
+
state, _, rs = Audits.audit_query(f'select partitions_last_checked, clusters_last_checked from meta')
|
|
25
|
+
if state == 'SUCCEEDED':
|
|
26
|
+
if len(rs) > 1:
|
|
27
|
+
try:
|
|
28
|
+
row = rs[1]['Data']
|
|
29
|
+
checked_in = float(row[0]['VarCharValue'])
|
|
30
|
+
cluster_last_checked = float(row[1]['VarCharValue'])
|
|
31
|
+
except:
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
return AuditMeta(checked_in, cluster_last_checked)
|
|
35
|
+
|
|
36
|
+
def find_new_clusters(cluster_last_checked: float) -> list[str]:
|
|
37
|
+
dt_object = datetime.fromtimestamp(cluster_last_checked)
|
|
38
|
+
|
|
39
|
+
y = dt_object.strftime("%Y")
|
|
40
|
+
m = dt_object.strftime("%m")
|
|
41
|
+
d = dt_object.strftime("%d")
|
|
42
|
+
# select distinct c2.name from cluster as c1 right outer join
|
|
43
|
+
# (select distinct c as name from audit where y = '1969' and m = '12' and d >= '31' or y = '1969' and m > '12' or y > '1969') as c2
|
|
44
|
+
# on c1.name = c2.name where c1.name is null
|
|
45
|
+
where = f"y = '{y}' and m = '{m}' and d >= '{d}' or y = '{y}' and m > '{m}' or y > '{y}'"
|
|
46
|
+
query = '\n '.join([
|
|
47
|
+
'select distinct c2.name from cluster as c1 right outer join',
|
|
48
|
+
f'(select distinct c as name from audit where {where}) as c2',
|
|
49
|
+
'on c1.name = c2.name where c1.name is null'])
|
|
50
|
+
log2(query)
|
|
51
|
+
state, _, rs = Audits.audit_query(query)
|
|
52
|
+
if state == 'SUCCEEDED':
|
|
53
|
+
if len(rs) > 1:
|
|
54
|
+
try:
|
|
55
|
+
return [r['Data'][0]['VarCharValue'] for r in rs[1:]]
|
|
56
|
+
except:
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
return []
|
|
60
|
+
|
|
61
|
+
def put_meta(action: str, meta: AuditMeta, clusters: list[str] = None):
|
|
62
|
+
payload = {
|
|
63
|
+
'action': action,
|
|
64
|
+
'partitions-last-checked': meta.partitions_last_checked,
|
|
65
|
+
'clusters-last-checked': meta.cluster_last_checked
|
|
66
|
+
}
|
|
67
|
+
if clusters:
|
|
68
|
+
payload['clusters'] = clusters
|
|
69
|
+
|
|
70
|
+
audit_endpoint = Config().get("audit.endpoint", "https://4psvtaxlcb.execute-api.us-west-2.amazonaws.com/prod/")
|
|
71
|
+
try:
|
|
72
|
+
response = requests.post(audit_endpoint, json=payload, timeout=Config().get("audit.timeout", 10))
|
|
73
|
+
if response.status_code in [200, 201]:
|
|
74
|
+
Config().debug(response.text)
|
|
75
|
+
else:
|
|
76
|
+
log2(f"Error: {response.status_code} {response.text}")
|
|
77
|
+
except requests.exceptions.Timeout as e:
|
|
78
|
+
log2(f"Timeout occurred: {e}")
|
|
79
|
+
|
|
80
|
+
@functools.lru_cache()
|
|
81
|
+
def audit_table_names():
|
|
82
|
+
region_name = Config().get('audit.athena.region', 'us-west-2')
|
|
83
|
+
database_name = Config().get('audit.athena.database', 'audit')
|
|
84
|
+
catalog_name = Config().get('audit.athena.catalog', 'AwsDataCatalog')
|
|
85
|
+
|
|
86
|
+
athena_client = boto3.client('athena', region_name=region_name)
|
|
87
|
+
paginator = athena_client.get_paginator('list_table_metadata')
|
|
88
|
+
|
|
89
|
+
table_names = []
|
|
90
|
+
for page in paginator.paginate(CatalogName=catalog_name, DatabaseName=database_name):
|
|
91
|
+
for table_metadata in page.get('TableMetadataList', []):
|
|
92
|
+
table_names.append(table_metadata['Name'])
|
|
93
|
+
|
|
94
|
+
return table_names
|
|
95
|
+
|
|
96
|
+
@functools.lru_cache()
|
|
97
|
+
def audit_column_names(tables: list[str] = [], database: str = None, partition_cols_only = False):
|
|
98
|
+
if not database:
|
|
99
|
+
database = Config().get('audit.athena.database', 'audit')
|
|
100
|
+
|
|
101
|
+
if not tables:
|
|
102
|
+
tables = Config().get('audit.athena.tables', 'audit').split(',')
|
|
103
|
+
|
|
104
|
+
table_names = "'" + "','".join([table.strip() for table in tables]) + "'"
|
|
105
|
+
|
|
106
|
+
query = f"select column_name from information_schema.columns where table_name in ({table_names}) and table_schema = '{database}'"
|
|
107
|
+
if partition_cols_only:
|
|
108
|
+
query = f"{query} and extra_info = 'partition key'"
|
|
109
|
+
|
|
110
|
+
_, _, rs = Audits.audit_query(query)
|
|
111
|
+
if rs:
|
|
112
|
+
return [row['Data'][0].get('VarCharValue') for row in rs[1:]]
|
|
113
|
+
|
|
114
|
+
return []
|
|
115
|
+
|
|
116
|
+
def run_audit_query(sql: str, database: str = None):
|
|
117
|
+
state, reason, rs = Audits.audit_query(sql, database)
|
|
118
|
+
|
|
119
|
+
if state == 'SUCCEEDED':
|
|
120
|
+
if rs:
|
|
121
|
+
column_info = rs[0]['Data']
|
|
122
|
+
columns = [col.get('VarCharValue') for col in column_info]
|
|
123
|
+
lines = []
|
|
124
|
+
for row in rs[1:]:
|
|
125
|
+
row_data = [col.get('VarCharValue') if col else '' for col in row['Data']]
|
|
126
|
+
lines.append('\t'.join(row_data))
|
|
127
|
+
|
|
128
|
+
log(lines_to_tabular(lines, header='\t'.join(columns), separator='\t'))
|
|
129
|
+
else:
|
|
130
|
+
log2(f"Query failed or was cancelled. State: {state}")
|
|
131
|
+
log2(f"Reason: {reason}")
|
|
132
|
+
|
|
133
|
+
def audit_query(sql: str, database: str = None) -> tuple[str, str, list]:
|
|
134
|
+
athena_client = boto3.client('athena')
|
|
135
|
+
|
|
136
|
+
if not database:
|
|
137
|
+
database = Config().get('audit.athena.database', 'audit')
|
|
138
|
+
|
|
139
|
+
s3_output_location = Config().get('audit.athena.output', 's3://s3.ops--audit/ddl/results')
|
|
140
|
+
|
|
141
|
+
response = athena_client.start_query_execution(
|
|
142
|
+
QueryString=sql,
|
|
143
|
+
QueryExecutionContext={
|
|
144
|
+
'Database': database
|
|
145
|
+
},
|
|
146
|
+
ResultConfiguration={
|
|
147
|
+
'OutputLocation': s3_output_location
|
|
148
|
+
}
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
query_execution_id = response['QueryExecutionId']
|
|
152
|
+
|
|
153
|
+
while True:
|
|
154
|
+
query_status = athena_client.get_query_execution(QueryExecutionId=query_execution_id)
|
|
155
|
+
state = query_status['QueryExecution']['Status']['State']
|
|
156
|
+
if state in ['SUCCEEDED', 'FAILED', 'CANCELLED']:
|
|
157
|
+
break
|
|
158
|
+
time.sleep(1)
|
|
159
|
+
|
|
160
|
+
if state == 'SUCCEEDED':
|
|
161
|
+
results_response = athena_client.get_query_results(QueryExecutionId=query_execution_id)
|
|
162
|
+
if results_response['ResultSet']['Rows']:
|
|
163
|
+
return (state, None, results_response['ResultSet']['Rows'])
|
|
164
|
+
|
|
165
|
+
return (state, None, [])
|
|
166
|
+
else:
|
|
167
|
+
return (state, query_status['QueryExecution']['Status'].get('StateChangeReason'), [])
|
adam/version.py
CHANGED
|
@@ -10,13 +10,13 @@ adam/embedded_params.py,sha256=YeyY3QTLBtjiMsBt16A7z8lvJ_hlVtAPJR6RbjStIwI,5060
|
|
|
10
10
|
adam/log.py,sha256=gg5DK52wLPc9cjykeh0WFHyAk1qI3HEpGaAK8W2dzXY,1146
|
|
11
11
|
adam/pod_exec_result.py,sha256=WBXJSvxzXp9TfsfXeHtIvgz8GvfMAAcH5M03GISLqzw,1046
|
|
12
12
|
adam/repl.py,sha256=ZfYEZ6s1Hhw-aIFZk358bb7KCAJ-aTDxSPJvfd2ciHQ,10904
|
|
13
|
-
adam/repl_commands.py,sha256=
|
|
13
|
+
adam/repl_commands.py,sha256=iKcw208Iqs4jxJYX2pCT4woGijpcg0IM58tVlhZQNo8,4646
|
|
14
14
|
adam/repl_session.py,sha256=uIogcvWBh7wd8QQ-p_JgLsyJ8YJgINw5vOd6JIsd7Vo,472
|
|
15
15
|
adam/repl_state.py,sha256=GuDq3C42hMX3pmjLlMbARJ8nGiBw8-gNT-xhkKQu-a4,8797
|
|
16
16
|
adam/utils.py,sha256=sbsNZP3qGJtb6fXCa4dDXHry5ay9ev583cCZIQzy07s,7382
|
|
17
|
-
adam/
|
|
17
|
+
adam/utils_audits.py,sha256=kPw6B0lw2q4PmYdk_k16MX_V-ONnrhJX3T-cMTFYZdM,6376
|
|
18
18
|
adam/utils_net.py,sha256=65fhBnWMCkhGtyHqz95qcHaCo35q-WX1RBkkXG8dKpI,416
|
|
19
|
-
adam/version.py,sha256=
|
|
19
|
+
adam/version.py,sha256=W2f85EjavUMXa9WADSC-bmOjXgpifTdBQ8mFdwXyUIA,140
|
|
20
20
|
adam/checks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
21
|
adam/checks/check.py,sha256=Qopr3huYcMu2bzQgb99dEUYjFzkjKHRI76S6KA9b9Rk,702
|
|
22
22
|
adam/checks/check_context.py,sha256=FEHkQ32jY1EDopQ2uYWqy9v7aEEX1orLpJWhopwAlh4,402
|
|
@@ -66,12 +66,12 @@ adam/commands/help.py,sha256=4IzR4p8UiXr00o1TaymHWm8957EWbWRyuvhrJzZzvc0,1734
|
|
|
66
66
|
adam/commands/issues.py,sha256=VS-PC7e-2lywsa-lbmoUX8IY77OPGzFudwbw1g8XmQc,2599
|
|
67
67
|
adam/commands/login.py,sha256=bj95WWIF7mJDJhnyS9T8xvaZUGL37dj7GlH8TgmODbk,1877
|
|
68
68
|
adam/commands/logs.py,sha256=GBVztFlCQfd4jfMtqydPjWS9xsB5mV4Aj4ohSQFm6i0,1165
|
|
69
|
-
adam/commands/ls.py,sha256=
|
|
69
|
+
adam/commands/ls.py,sha256=49bS2V6jL_HAu6Y9kJzf8e3v4krxn_wNXUf47_R3Tkw,5805
|
|
70
70
|
adam/commands/nodetool.py,sha256=k4Gr63slw0BGinbbj3nHue2GHf55cjAjTdt636UKjb8,2363
|
|
71
71
|
adam/commands/nodetool_commands.py,sha256=5IgWC3rmeDD1cgwqQjiiWzi-wJpJ3n_8pAzz_9phXuk,2635
|
|
72
72
|
adam/commands/param_get.py,sha256=kPAAppK2T0tEFRnSIVFLDPIIGHhgLA7drJhn8TRyvvE,1305
|
|
73
73
|
adam/commands/param_set.py,sha256=QDIuqfU80aWCB16OK49yf7XRaRTWwiLkwMsJuVikq9I,1271
|
|
74
|
-
adam/commands/preview_table.py,sha256=
|
|
74
|
+
adam/commands/preview_table.py,sha256=mz9Cstk_UfNNi3rBye4kZ4M0mtjeyfRCUtqP5zifvDc,2843
|
|
75
75
|
adam/commands/pwd.py,sha256=AvM1gMgxBfIupevtYkGmNWGyEcsIx7-S8L9_v_DPEgk,2469
|
|
76
76
|
adam/commands/report.py,sha256=Ky45LIzSlB_X4V12JZWjU3SA2u4_FKRencRTq7psOWU,1944
|
|
77
77
|
adam/commands/restart.py,sha256=SAxWHvglTckQJ0tJe5t-HWsVerbreNMM-7Nb9PAqno4,2044
|
|
@@ -79,9 +79,9 @@ adam/commands/rollout.py,sha256=Db9P4Owd3aPcRLIGhwyEElBNm_2Ke54KbiXyVKmztcE,2959
|
|
|
79
79
|
adam/commands/shell.py,sha256=wY_PIx7Lt6vuxhFArlfxdEnBbrouCJ3yNHhFn17DEqw,848
|
|
80
80
|
adam/commands/watch.py,sha256=fU2LGll-Igl08HpUQALOnh8l3s3AMGFX26NCLhqbfcw,2438
|
|
81
81
|
adam/commands/audit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
82
|
-
adam/commands/audit/audit.py,sha256=
|
|
83
|
-
adam/commands/audit/audit_repair_tables.py,sha256=
|
|
84
|
-
adam/commands/audit/audit_run.py,sha256=
|
|
82
|
+
adam/commands/audit/audit.py,sha256=DcyolsPy9Qzan5x3lwomkmkwpAeAa9PHj8Y1Q2bwI70,2584
|
|
83
|
+
adam/commands/audit/audit_repair_tables.py,sha256=zNyhOK_-dbeK-7rH-b_HNeH5Qwrk_ccqakWKujVP7Lk,2549
|
|
84
|
+
adam/commands/audit/audit_run.py,sha256=Bfzv73aaYvHtRhr_mtzeH1muJtexhPfoUPENAiGKCgI,1699
|
|
85
85
|
adam/commands/cql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
86
86
|
adam/commands/cql/cql_completions.py,sha256=29TJmNHg2_EwGyBrgh04dtZl0BDxGEnLXf_hbPQF9no,660
|
|
87
87
|
adam/commands/cql/cql_utils.py,sha256=EWswF4JEj4EI_lIa1tqhNV-0sasf81SK64aNf8JPqZo,4103
|
|
@@ -176,8 +176,8 @@ adam/utils_k8s/service_accounts.py,sha256=v2oQSqCrNvt2uRnKlNwR3fjtpUG7oF5nqgzEB7
|
|
|
176
176
|
adam/utils_k8s/services.py,sha256=EOJJGACVbbRvu5T3rMKqIJqgYic1_MSJ17EA0TJ6UOk,3156
|
|
177
177
|
adam/utils_k8s/statefulsets.py,sha256=0J_cYRqH96PCcq3tdsRrs4Q4ewv5dT_FMBR0HGAJ3d8,4710
|
|
178
178
|
adam/utils_k8s/volumes.py,sha256=RIBmlOSWM3V3QVXLCFT0owVOyh4rGG1ETp521a-6ndo,1137
|
|
179
|
-
kaqing-2.0.
|
|
180
|
-
kaqing-2.0.
|
|
181
|
-
kaqing-2.0.
|
|
182
|
-
kaqing-2.0.
|
|
183
|
-
kaqing-2.0.
|
|
179
|
+
kaqing-2.0.105.dist-info/METADATA,sha256=-XxUVOCHMejB3MGXatLONCSDP3GKrOQE2MBoEXNp_U4,133
|
|
180
|
+
kaqing-2.0.105.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
|
181
|
+
kaqing-2.0.105.dist-info/entry_points.txt,sha256=SkzhuQJUWsXOzHeZ5TgQ2c3_g53UGK23zzJU_JTZOZI,39
|
|
182
|
+
kaqing-2.0.105.dist-info/top_level.txt,sha256=8_2PZkwBb-xDcnc8a2rAbQeJhXKXskc7zTP7pSPa1fw,5
|
|
183
|
+
kaqing-2.0.105.dist-info/RECORD,,
|
adam/utils_athena.py
DELETED
|
@@ -1,162 +0,0 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
|
-
import functools
|
|
3
|
-
import time
|
|
4
|
-
import boto3
|
|
5
|
-
import requests
|
|
6
|
-
|
|
7
|
-
from adam.config import Config
|
|
8
|
-
from adam.utils import lines_to_tabular, log, log2
|
|
9
|
-
|
|
10
|
-
class AuditMeta:
|
|
11
|
-
def __init__(self, checked_in: float, cluster_last_checked: float):
|
|
12
|
-
self.checked_in = checked_in
|
|
13
|
-
self.cluster_last_checked = cluster_last_checked
|
|
14
|
-
|
|
15
|
-
def get_meta() -> AuditMeta:
|
|
16
|
-
checked_in = 0.0
|
|
17
|
-
cluster_last_checked = 0.0
|
|
18
|
-
|
|
19
|
-
state, _, rs = audit_query(f'select partitions_last_checked, clusters_last_checked from meta')
|
|
20
|
-
if state == 'SUCCEEDED':
|
|
21
|
-
if len(rs) > 1:
|
|
22
|
-
try:
|
|
23
|
-
row = rs[1]['Data']
|
|
24
|
-
checked_in = float(row[0]['VarCharValue'])
|
|
25
|
-
cluster_last_checked = float(row[1]['VarCharValue'])
|
|
26
|
-
except:
|
|
27
|
-
pass
|
|
28
|
-
|
|
29
|
-
return AuditMeta(checked_in, cluster_last_checked)
|
|
30
|
-
|
|
31
|
-
def find_new_clusters(cluster_last_checked: float) -> list[str]:
|
|
32
|
-
dt_object = datetime.fromtimestamp(cluster_last_checked)
|
|
33
|
-
|
|
34
|
-
y = dt_object.strftime("%Y")
|
|
35
|
-
m = dt_object.strftime("%m")
|
|
36
|
-
d = dt_object.strftime("%d")
|
|
37
|
-
# select distinct c2.name from cluster as c1 right outer join
|
|
38
|
-
# (select distinct c as name from audit where y = '1969' and m = '12' and d >= '31' or y = '1969' and m > '12' or y > '1969') as c2
|
|
39
|
-
# on c1.name = c2.name where c1.name is null
|
|
40
|
-
where = f"y = '{y}' and m = '{m}' and d >= '{d}' or y = '{y}' and m > '{m}' or y > '{y}'"
|
|
41
|
-
query = '\n '.join([
|
|
42
|
-
'select distinct c2.name from cluster as c1 right outer join',
|
|
43
|
-
f'(select distinct c as name from audit where {where}) as c2',
|
|
44
|
-
'on c1.name = c2.name where c1.name is null'])
|
|
45
|
-
log2(query)
|
|
46
|
-
state, _, rs = audit_query(query)
|
|
47
|
-
if state == 'SUCCEEDED':
|
|
48
|
-
if len(rs) > 1:
|
|
49
|
-
try:
|
|
50
|
-
return [r['Data'][0]['VarCharValue'] for r in rs[1:]]
|
|
51
|
-
except:
|
|
52
|
-
pass
|
|
53
|
-
|
|
54
|
-
return []
|
|
55
|
-
|
|
56
|
-
def put_meta(action: str, meta: AuditMeta, clusters: list[str] = None):
|
|
57
|
-
payload = {
|
|
58
|
-
'action': action,
|
|
59
|
-
'partitions-last-checked': meta.checked_in,
|
|
60
|
-
'clusters-last-checked': meta.cluster_last_checked
|
|
61
|
-
}
|
|
62
|
-
if clusters:
|
|
63
|
-
payload['clusters'] = clusters
|
|
64
|
-
|
|
65
|
-
audit_endpoint = Config().get("audit.endpoint", "https://4psvtaxlcb.execute-api.us-west-2.amazonaws.com/prod/")
|
|
66
|
-
try:
|
|
67
|
-
response = requests.post(audit_endpoint, json=payload, timeout=Config().get("audit.timeout", 10))
|
|
68
|
-
if response.status_code in [200, 201]:
|
|
69
|
-
Config().debug(response.text)
|
|
70
|
-
else:
|
|
71
|
-
log2(f"Error: {response.status_code} {response.text}")
|
|
72
|
-
except requests.exceptions.Timeout as e:
|
|
73
|
-
log2(f"Timeout occurred: {e}")
|
|
74
|
-
|
|
75
|
-
@functools.lru_cache()
|
|
76
|
-
def audit_table_names():
|
|
77
|
-
region_name = Config().get('audit.athena.region', 'us-west-2')
|
|
78
|
-
database_name = Config().get('audit.athena.database', 'audit')
|
|
79
|
-
catalog_name = Config().get('audit.athena.catalog', 'AwsDataCatalog')
|
|
80
|
-
|
|
81
|
-
athena_client = boto3.client('athena', region_name=region_name)
|
|
82
|
-
paginator = athena_client.get_paginator('list_table_metadata')
|
|
83
|
-
|
|
84
|
-
table_names = []
|
|
85
|
-
for page in paginator.paginate(CatalogName=catalog_name, DatabaseName=database_name):
|
|
86
|
-
for table_metadata in page.get('TableMetadataList', []):
|
|
87
|
-
table_names.append(table_metadata['Name'])
|
|
88
|
-
|
|
89
|
-
return table_names
|
|
90
|
-
|
|
91
|
-
@functools.lru_cache()
|
|
92
|
-
def audit_column_names(tables: list[str] = [], database: str = None, partition_cols_only = False):
|
|
93
|
-
if not database:
|
|
94
|
-
database = Config().get('audit.athena.database', 'audit')
|
|
95
|
-
|
|
96
|
-
if not tables:
|
|
97
|
-
tables = Config().get('audit.athena.tables', 'audit').split(',')
|
|
98
|
-
|
|
99
|
-
table_names = "'" + "','".join([table.strip() for table in tables]) + "'"
|
|
100
|
-
|
|
101
|
-
query = f"select column_name from information_schema.columns where table_name in ({table_names}) and table_schema = '{database}'"
|
|
102
|
-
if partition_cols_only:
|
|
103
|
-
query = f"{query} and extra_info = 'partition key'"
|
|
104
|
-
|
|
105
|
-
_, _, rs = audit_query(query)
|
|
106
|
-
if rs:
|
|
107
|
-
return [row['Data'][0].get('VarCharValue') for row in rs[1:]]
|
|
108
|
-
|
|
109
|
-
return []
|
|
110
|
-
|
|
111
|
-
def run_audit_query(sql: str, database: str = None):
|
|
112
|
-
state, reason, rs = audit_query(sql, database)
|
|
113
|
-
|
|
114
|
-
if state == 'SUCCEEDED':
|
|
115
|
-
if rs:
|
|
116
|
-
column_info = rs[0]['Data']
|
|
117
|
-
columns = [col.get('VarCharValue') for col in column_info]
|
|
118
|
-
lines = []
|
|
119
|
-
for row in rs[1:]:
|
|
120
|
-
row_data = [col.get('VarCharValue') if col else '' for col in row['Data']]
|
|
121
|
-
lines.append('\t'.join(row_data))
|
|
122
|
-
|
|
123
|
-
log(lines_to_tabular(lines, header='\t'.join(columns), separator='\t'))
|
|
124
|
-
else:
|
|
125
|
-
log2(f"Query failed or was cancelled. State: {state}")
|
|
126
|
-
log2(f"Reason: {reason}")
|
|
127
|
-
|
|
128
|
-
def audit_query(sql: str, database: str = None) -> tuple[str, str, list]:
|
|
129
|
-
athena_client = boto3.client('athena')
|
|
130
|
-
|
|
131
|
-
if not database:
|
|
132
|
-
database = Config().get('audit.athena.database', 'audit')
|
|
133
|
-
|
|
134
|
-
s3_output_location = Config().get('audit.athena.output', 's3://s3.ops--audit/ddl/results')
|
|
135
|
-
|
|
136
|
-
response = athena_client.start_query_execution(
|
|
137
|
-
QueryString=sql,
|
|
138
|
-
QueryExecutionContext={
|
|
139
|
-
'Database': database
|
|
140
|
-
},
|
|
141
|
-
ResultConfiguration={
|
|
142
|
-
'OutputLocation': s3_output_location
|
|
143
|
-
}
|
|
144
|
-
)
|
|
145
|
-
|
|
146
|
-
query_execution_id = response['QueryExecutionId']
|
|
147
|
-
|
|
148
|
-
while True:
|
|
149
|
-
query_status = athena_client.get_query_execution(QueryExecutionId=query_execution_id)
|
|
150
|
-
state = query_status['QueryExecution']['Status']['State']
|
|
151
|
-
if state in ['SUCCEEDED', 'FAILED', 'CANCELLED']:
|
|
152
|
-
break
|
|
153
|
-
time.sleep(1)
|
|
154
|
-
|
|
155
|
-
if state == 'SUCCEEDED':
|
|
156
|
-
results_response = athena_client.get_query_results(QueryExecutionId=query_execution_id)
|
|
157
|
-
if results_response['ResultSet']['Rows']:
|
|
158
|
-
return (state, None, results_response['ResultSet']['Rows'])
|
|
159
|
-
|
|
160
|
-
return (state, None, [])
|
|
161
|
-
else:
|
|
162
|
-
return (state, query_status['QueryExecution']['Status'].get('StateChangeReason'), [])
|
|
File without changes
|
|
File without changes
|
|
File without changes
|