kaqing 2.0.104__py3-none-any.whl → 2.0.105__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kaqing might be problematic. Click here for more details.

@@ -7,7 +7,7 @@ from adam.config import Config
7
7
  from adam.repl_state import ReplState
8
8
  from adam.sql.sql_completer import SqlCompleter
9
9
  from adam.utils import log2
10
- from adam.utils_athena import audit_column_names, audit_table_names, run_audit_query
10
+ from adam.utils_audits import Audits
11
11
 
12
12
  class Audit(Command):
13
13
  COMMAND = 'audit'
@@ -42,7 +42,7 @@ class Audit(Command):
42
42
  else:
43
43
  log2(sql)
44
44
 
45
- run_audit_query(sql)
45
+ Audits.run_audit_query(sql)
46
46
 
47
47
  return state
48
48
 
@@ -52,13 +52,13 @@ class Audit(Command):
52
52
  Config().wait_log(f'Inspecting audit database schema...')
53
53
  self.schema_read = True
54
54
  # warm up the caches first time when l: drive is accessed
55
- audit_column_names()
56
- audit_column_names(partition_cols_only=True)
55
+ Audits.audit_column_names()
56
+ Audits.audit_column_names(partition_cols_only=True)
57
57
 
58
58
  return super().completion(state) | SqlCompleter(
59
- lambda: audit_table_names(),
60
- columns=lambda table: audit_column_names(),
61
- partition_columns=lambda table: audit_column_names(partition_cols_only=True),
59
+ lambda: Audits.audit_table_names(),
60
+ columns=lambda table: Audits.audit_column_names(),
61
+ partition_columns=lambda table: Audits.audit_column_names(partition_cols_only=True),
62
62
  variant='athena'
63
63
  ).completions_for_nesting()
64
64
 
@@ -68,7 +68,7 @@ class Audit(Command):
68
68
  return [AuditRepairTables(), AuditRun()]
69
69
 
70
70
  def help(self, _: ReplState):
71
- return f'[{Audit.COMMAND}] <sql-statements>\t run SQL queries on Authena audit database'
71
+ return f'[{Audit.COMMAND}] [<sql-statements>]\t run SQL queries on Authena audit database'
72
72
 
73
73
  class AuditCommandHelper(click.Command):
74
74
  def get_help(self, ctx: click.Context):
@@ -1,12 +1,11 @@
1
1
  import concurrent
2
2
  import time
3
- import requests
4
3
 
5
4
  from adam.commands.command import Command
6
5
  from adam.config import Config
7
6
  from adam.repl_state import ReplState
8
7
  from adam.utils import log, log2
9
- from adam.utils_athena import AuditMeta, audit_query, get_meta, put_meta, run_audit_query
8
+ from adam.utils_audits import AuditMeta, Audits
10
9
 
11
10
  class AuditRepairTables(Command):
12
11
  COMMAND = 'audit repair'
@@ -34,7 +33,7 @@ class AuditRepairTables(Command):
34
33
  if args:
35
34
  tables = args
36
35
 
37
- meta = get_meta()
36
+ meta = Audits.get_meta()
38
37
  self.repair(tables, meta)
39
38
 
40
39
  return state
@@ -53,8 +52,8 @@ class AuditRepairTables(Command):
53
52
  def auto_repair(self, hours: int):
54
53
  self.auto_repaired = True
55
54
 
56
- meta = get_meta()
57
- if meta.checked_in + hours * 60 * 60 < time.time():
55
+ meta: AuditMeta = Audits.get_meta()
56
+ if meta.partitions_last_checked + hours * 60 * 60 < time.time():
58
57
  tables = Config().get('audit.athena.repair-partition-tables', 'audit').split(',')
59
58
  self.repair(tables, meta, show_sql=True)
60
59
  log2(f'Audit tables have been auto-repaired.')
@@ -65,8 +64,8 @@ class AuditRepairTables(Command):
65
64
  if show_sql:
66
65
  log(f'MSCK REPAIR TABLE {table}')
67
66
 
68
- executor.submit(run_audit_query, f'MSCK REPAIR TABLE {table}', None,)
69
- executor.submit(put_meta, 'check-in', meta,)
67
+ executor.submit(Audits.run_audit_query, f'MSCK REPAIR TABLE {table}', None,)
68
+ executor.submit(Audits.put_meta, Audits.PARTITIONS_ADDED, meta,)
70
69
 
71
70
  def help(self, _: ReplState):
72
71
  return f"{AuditRepairTables.COMMAND} \t run MSCK REPAIR command for new partition discovery"
@@ -1,8 +1,9 @@
1
+ import concurrent
1
2
  from adam.commands.command import Command
2
3
  from adam.config import Config
3
4
  from adam.repl_state import ReplState
4
5
  from adam.utils import log2
5
- from adam.utils_athena import AuditMeta, find_new_clusters, get_meta, put_meta, run_audit_query
6
+ from adam.utils_audits import AuditMeta, Audits
6
7
 
7
8
  class AuditRun(Command):
8
9
  COMMAND = 'audit run'
@@ -26,14 +27,15 @@ class AuditRun(Command):
26
27
 
27
28
  state, args = self.apply_state(args, state)
28
29
 
29
- meta: AuditMeta = get_meta()
30
- clusters = find_new_clusters(meta.cluster_last_checked)
30
+ meta: AuditMeta = Audits.get_meta()
31
+ clusters = Audits.find_new_clusters(meta.cluster_last_checked)
32
+ Audits.put_meta(Audits.ADD_CLUSTERS, meta, clusters=clusters)
31
33
  if clusters:
32
- put_meta('add-clusters', meta, clusters=clusters)
33
34
  log2(f'Added {len(clusters)} new clusters.')
34
35
  tables = Config().get('audit.athena.repair-cluster-tables', 'cluster').split(',')
35
- for table in tables:
36
- run_audit_query(f'MSCK REPAIR TABLE {table}')
36
+ with concurrent.futures.ThreadPoolExecutor(max_workers=Config().get('audit.workers', 3)) as executor:
37
+ for table in tables:
38
+ Audits.run_audit_query(f'MSCK REPAIR TABLE {table}')
37
39
  else:
38
40
  log2(f'No new clusters were found.')
39
41
 
adam/commands/ls.py CHANGED
@@ -14,7 +14,7 @@ from adam.pod_exec_result import PodExecResult
14
14
  from adam.repl_state import ReplState
15
15
  from adam.utils import lines_to_tabular, log, log2
16
16
  from adam.apps import Apps
17
- from adam.utils_athena import audit_table_names
17
+ from adam.utils_audits import Audits
18
18
 
19
19
  class Ls(Command):
20
20
  COMMAND = 'ls'
@@ -131,7 +131,7 @@ class Ls(Command):
131
131
  log(lines_to_tabular(pg_table_names(pg.namespace, pg.path()), 'NAME', separator=','))
132
132
 
133
133
  def show_audit_log_tables(self):
134
- log(lines_to_tabular(audit_table_names(), 'NAME', separator=','))
134
+ log(lines_to_tabular(Audits.audit_table_names(), 'NAME', separator=','))
135
135
 
136
136
  def completion(self, state: ReplState):
137
137
  if state.pod:
@@ -4,7 +4,7 @@ from adam.commands.postgres.postgres_context import PostgresContext
4
4
  from adam.config import Config
5
5
  from adam.repl_state import ReplState, RequiredState
6
6
  from adam.utils import lines_to_tabular, log, log2
7
- from adam.utils_athena import audit_table_names, run_audit_query
7
+ from adam.utils_audits import Audits
8
8
 
9
9
  class PreviewTable(Command):
10
10
  COMMAND = 'preview'
@@ -43,7 +43,7 @@ class PreviewTable(Command):
43
43
  lines = [db["name"] for db in pg.tables() if db["schema"] == PostgresContext.default_schema()]
44
44
  log(lines_to_tabular(lines, separator=','))
45
45
  elif state.device == ReplState.L:
46
- log(lines_to_tabular(audit_table_names(), separator=','))
46
+ log(lines_to_tabular(Audits.audit_table_names(), separator=','))
47
47
  else:
48
48
  log(lines_to_tabular(cassandra_table_names(state), separator=','))
49
49
 
@@ -66,7 +66,7 @@ class PreviewTable(Command):
66
66
  if state.device == ReplState.P:
67
67
  PostgresContext.apply(state.namespace, state.pg_path).run_sql(f'select * from {table} limit {rows}')
68
68
  elif state.device == ReplState.L:
69
- run_audit_query(f'select * from {table} limit {rows}')
69
+ Audits.run_audit_query(f'select * from {table} limit {rows}')
70
70
  else:
71
71
  run_cql(state, f'select * from {table} limit {rows}', show_out=True, use_single_quotes=True, on_any=True)
72
72
 
adam/repl_commands.py CHANGED
@@ -58,7 +58,7 @@ class ReplCommands:
58
58
  cmds: list[Command] = ReplCommands.navigation() + ReplCommands.cassandra_check() + ReplCommands.cassandra_ops() + \
59
59
  ReplCommands.tools() + ReplCommands.app() + ReplCommands.exit()
60
60
 
61
- intermediate_cmds: list[Command] = [App(), Reaper(), Repair(), Deploy(), Show(), Undeploy()]
61
+ intermediate_cmds: list[Command] = [App(), Audit(), Reaper(), Repair(), Deploy(), Show(), Undeploy()]
62
62
  ic = [c.command() for c in intermediate_cmds]
63
63
  # 1. dedup commands
64
64
  deduped = []
@@ -86,7 +86,7 @@ class ReplCommands:
86
86
 
87
87
  def tools() -> list[Command]:
88
88
  return [Cqlsh(), Postgres(), Bash(), Shell(), CodeStart(), CodeStop(), DeployFrontend(), UndeployFrontend(),
89
- DeployPod(), UndeployPod(), DeployPgAgent(), UndeployPgAgent(), AuditRepairTables(), AuditRun(), Audit()]
89
+ DeployPod(), UndeployPod(), DeployPgAgent(), UndeployPgAgent(), Audit()] + Audit.cmd_list()
90
90
 
91
91
  def app() -> list[Command]:
92
92
  return [ShowAppActions(), ShowAppId(), ShowAppQueues(), AppPing(), App()]
adam/utils_audits.py ADDED
@@ -0,0 +1,167 @@
1
+ from datetime import datetime
2
+ import functools
3
+ import time
4
+ import boto3
5
+ import requests
6
+
7
+ from adam.config import Config
8
+ from adam.utils import lines_to_tabular, log, log2
9
+
10
+ class AuditMeta:
11
+ def __init__(self, partitions_last_checked: float, cluster_last_checked: float):
12
+ self.partitions_last_checked = partitions_last_checked
13
+ self.cluster_last_checked = cluster_last_checked
14
+
15
+ # no state utility class
16
+ class Audits:
17
+ PARTITIONS_ADDED = 'partitions-added'
18
+ ADD_CLUSTERS = 'add-clusters'
19
+
20
+ def get_meta() -> AuditMeta:
21
+ checked_in = 0.0
22
+ cluster_last_checked = 0.0
23
+
24
+ state, _, rs = Audits.audit_query(f'select partitions_last_checked, clusters_last_checked from meta')
25
+ if state == 'SUCCEEDED':
26
+ if len(rs) > 1:
27
+ try:
28
+ row = rs[1]['Data']
29
+ checked_in = float(row[0]['VarCharValue'])
30
+ cluster_last_checked = float(row[1]['VarCharValue'])
31
+ except:
32
+ pass
33
+
34
+ return AuditMeta(checked_in, cluster_last_checked)
35
+
36
+ def find_new_clusters(cluster_last_checked: float) -> list[str]:
37
+ dt_object = datetime.fromtimestamp(cluster_last_checked)
38
+
39
+ y = dt_object.strftime("%Y")
40
+ m = dt_object.strftime("%m")
41
+ d = dt_object.strftime("%d")
42
+ # select distinct c2.name from cluster as c1 right outer join
43
+ # (select distinct c as name from audit where y = '1969' and m = '12' and d >= '31' or y = '1969' and m > '12' or y > '1969') as c2
44
+ # on c1.name = c2.name where c1.name is null
45
+ where = f"y = '{y}' and m = '{m}' and d >= '{d}' or y = '{y}' and m > '{m}' or y > '{y}'"
46
+ query = '\n '.join([
47
+ 'select distinct c2.name from cluster as c1 right outer join',
48
+ f'(select distinct c as name from audit where {where}) as c2',
49
+ 'on c1.name = c2.name where c1.name is null'])
50
+ log2(query)
51
+ state, _, rs = Audits.audit_query(query)
52
+ if state == 'SUCCEEDED':
53
+ if len(rs) > 1:
54
+ try:
55
+ return [r['Data'][0]['VarCharValue'] for r in rs[1:]]
56
+ except:
57
+ pass
58
+
59
+ return []
60
+
61
+ def put_meta(action: str, meta: AuditMeta, clusters: list[str] = None):
62
+ payload = {
63
+ 'action': action,
64
+ 'partitions-last-checked': meta.partitions_last_checked,
65
+ 'clusters-last-checked': meta.cluster_last_checked
66
+ }
67
+ if clusters:
68
+ payload['clusters'] = clusters
69
+
70
+ audit_endpoint = Config().get("audit.endpoint", "https://4psvtaxlcb.execute-api.us-west-2.amazonaws.com/prod/")
71
+ try:
72
+ response = requests.post(audit_endpoint, json=payload, timeout=Config().get("audit.timeout", 10))
73
+ if response.status_code in [200, 201]:
74
+ Config().debug(response.text)
75
+ else:
76
+ log2(f"Error: {response.status_code} {response.text}")
77
+ except requests.exceptions.Timeout as e:
78
+ log2(f"Timeout occurred: {e}")
79
+
80
+ @functools.lru_cache()
81
+ def audit_table_names():
82
+ region_name = Config().get('audit.athena.region', 'us-west-2')
83
+ database_name = Config().get('audit.athena.database', 'audit')
84
+ catalog_name = Config().get('audit.athena.catalog', 'AwsDataCatalog')
85
+
86
+ athena_client = boto3.client('athena', region_name=region_name)
87
+ paginator = athena_client.get_paginator('list_table_metadata')
88
+
89
+ table_names = []
90
+ for page in paginator.paginate(CatalogName=catalog_name, DatabaseName=database_name):
91
+ for table_metadata in page.get('TableMetadataList', []):
92
+ table_names.append(table_metadata['Name'])
93
+
94
+ return table_names
95
+
96
+ @functools.lru_cache()
97
+ def audit_column_names(tables: list[str] = [], database: str = None, partition_cols_only = False):
98
+ if not database:
99
+ database = Config().get('audit.athena.database', 'audit')
100
+
101
+ if not tables:
102
+ tables = Config().get('audit.athena.tables', 'audit').split(',')
103
+
104
+ table_names = "'" + "','".join([table.strip() for table in tables]) + "'"
105
+
106
+ query = f"select column_name from information_schema.columns where table_name in ({table_names}) and table_schema = '{database}'"
107
+ if partition_cols_only:
108
+ query = f"{query} and extra_info = 'partition key'"
109
+
110
+ _, _, rs = Audits.audit_query(query)
111
+ if rs:
112
+ return [row['Data'][0].get('VarCharValue') for row in rs[1:]]
113
+
114
+ return []
115
+
116
+ def run_audit_query(sql: str, database: str = None):
117
+ state, reason, rs = Audits.audit_query(sql, database)
118
+
119
+ if state == 'SUCCEEDED':
120
+ if rs:
121
+ column_info = rs[0]['Data']
122
+ columns = [col.get('VarCharValue') for col in column_info]
123
+ lines = []
124
+ for row in rs[1:]:
125
+ row_data = [col.get('VarCharValue') if col else '' for col in row['Data']]
126
+ lines.append('\t'.join(row_data))
127
+
128
+ log(lines_to_tabular(lines, header='\t'.join(columns), separator='\t'))
129
+ else:
130
+ log2(f"Query failed or was cancelled. State: {state}")
131
+ log2(f"Reason: {reason}")
132
+
133
+ def audit_query(sql: str, database: str = None) -> tuple[str, str, list]:
134
+ athena_client = boto3.client('athena')
135
+
136
+ if not database:
137
+ database = Config().get('audit.athena.database', 'audit')
138
+
139
+ s3_output_location = Config().get('audit.athena.output', 's3://s3.ops--audit/ddl/results')
140
+
141
+ response = athena_client.start_query_execution(
142
+ QueryString=sql,
143
+ QueryExecutionContext={
144
+ 'Database': database
145
+ },
146
+ ResultConfiguration={
147
+ 'OutputLocation': s3_output_location
148
+ }
149
+ )
150
+
151
+ query_execution_id = response['QueryExecutionId']
152
+
153
+ while True:
154
+ query_status = athena_client.get_query_execution(QueryExecutionId=query_execution_id)
155
+ state = query_status['QueryExecution']['Status']['State']
156
+ if state in ['SUCCEEDED', 'FAILED', 'CANCELLED']:
157
+ break
158
+ time.sleep(1)
159
+
160
+ if state == 'SUCCEEDED':
161
+ results_response = athena_client.get_query_results(QueryExecutionId=query_execution_id)
162
+ if results_response['ResultSet']['Rows']:
163
+ return (state, None, results_response['ResultSet']['Rows'])
164
+
165
+ return (state, None, [])
166
+ else:
167
+ return (state, query_status['QueryExecution']['Status'].get('StateChangeReason'), [])
adam/version.py CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env python
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- __version__ = "2.0.104" #: the working version
4
+ __version__ = "2.0.105" #: the working version
5
5
  __release__ = "1.0.0" #: the release version
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: kaqing
3
- Version: 2.0.104
3
+ Version: 2.0.105
4
4
  Summary: UNKNOWN
5
5
  Home-page: UNKNOWN
6
6
  License: UNKNOWN
@@ -10,13 +10,13 @@ adam/embedded_params.py,sha256=YeyY3QTLBtjiMsBt16A7z8lvJ_hlVtAPJR6RbjStIwI,5060
10
10
  adam/log.py,sha256=gg5DK52wLPc9cjykeh0WFHyAk1qI3HEpGaAK8W2dzXY,1146
11
11
  adam/pod_exec_result.py,sha256=WBXJSvxzXp9TfsfXeHtIvgz8GvfMAAcH5M03GISLqzw,1046
12
12
  adam/repl.py,sha256=ZfYEZ6s1Hhw-aIFZk358bb7KCAJ-aTDxSPJvfd2ciHQ,10904
13
- adam/repl_commands.py,sha256=eaxwvI1z2BnuRY4HTNqpzr0L7DhCl9j3FzVvVmi5SuI,4651
13
+ adam/repl_commands.py,sha256=iKcw208Iqs4jxJYX2pCT4woGijpcg0IM58tVlhZQNo8,4646
14
14
  adam/repl_session.py,sha256=uIogcvWBh7wd8QQ-p_JgLsyJ8YJgINw5vOd6JIsd7Vo,472
15
15
  adam/repl_state.py,sha256=GuDq3C42hMX3pmjLlMbARJ8nGiBw8-gNT-xhkKQu-a4,8797
16
16
  adam/utils.py,sha256=sbsNZP3qGJtb6fXCa4dDXHry5ay9ev583cCZIQzy07s,7382
17
- adam/utils_athena.py,sha256=hNvwd3F1qqs-3rGoK42aq0b5XdrHuRXGQtBRU4SBIqY,5840
17
+ adam/utils_audits.py,sha256=kPw6B0lw2q4PmYdk_k16MX_V-ONnrhJX3T-cMTFYZdM,6376
18
18
  adam/utils_net.py,sha256=65fhBnWMCkhGtyHqz95qcHaCo35q-WX1RBkkXG8dKpI,416
19
- adam/version.py,sha256=LKihFK6mt-4FST7GwFS-_WfQG7UfoEZB6WZ6szHFz5Q,140
19
+ adam/version.py,sha256=W2f85EjavUMXa9WADSC-bmOjXgpifTdBQ8mFdwXyUIA,140
20
20
  adam/checks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  adam/checks/check.py,sha256=Qopr3huYcMu2bzQgb99dEUYjFzkjKHRI76S6KA9b9Rk,702
22
22
  adam/checks/check_context.py,sha256=FEHkQ32jY1EDopQ2uYWqy9v7aEEX1orLpJWhopwAlh4,402
@@ -66,12 +66,12 @@ adam/commands/help.py,sha256=4IzR4p8UiXr00o1TaymHWm8957EWbWRyuvhrJzZzvc0,1734
66
66
  adam/commands/issues.py,sha256=VS-PC7e-2lywsa-lbmoUX8IY77OPGzFudwbw1g8XmQc,2599
67
67
  adam/commands/login.py,sha256=bj95WWIF7mJDJhnyS9T8xvaZUGL37dj7GlH8TgmODbk,1877
68
68
  adam/commands/logs.py,sha256=GBVztFlCQfd4jfMtqydPjWS9xsB5mV4Aj4ohSQFm6i0,1165
69
- adam/commands/ls.py,sha256=cNs_pl9UZpG43NrOVc3EBAbzgHye1I0E-vN72tIG6ok,5809
69
+ adam/commands/ls.py,sha256=49bS2V6jL_HAu6Y9kJzf8e3v4krxn_wNXUf47_R3Tkw,5805
70
70
  adam/commands/nodetool.py,sha256=k4Gr63slw0BGinbbj3nHue2GHf55cjAjTdt636UKjb8,2363
71
71
  adam/commands/nodetool_commands.py,sha256=5IgWC3rmeDD1cgwqQjiiWzi-wJpJ3n_8pAzz_9phXuk,2635
72
72
  adam/commands/param_get.py,sha256=kPAAppK2T0tEFRnSIVFLDPIIGHhgLA7drJhn8TRyvvE,1305
73
73
  adam/commands/param_set.py,sha256=QDIuqfU80aWCB16OK49yf7XRaRTWwiLkwMsJuVikq9I,1271
74
- adam/commands/preview_table.py,sha256=scGOpKkSc95IHGWfQOrM0cctSgeDmjr2E6BoPp5xk1U,2857
74
+ adam/commands/preview_table.py,sha256=mz9Cstk_UfNNi3rBye4kZ4M0mtjeyfRCUtqP5zifvDc,2843
75
75
  adam/commands/pwd.py,sha256=AvM1gMgxBfIupevtYkGmNWGyEcsIx7-S8L9_v_DPEgk,2469
76
76
  adam/commands/report.py,sha256=Ky45LIzSlB_X4V12JZWjU3SA2u4_FKRencRTq7psOWU,1944
77
77
  adam/commands/restart.py,sha256=SAxWHvglTckQJ0tJe5t-HWsVerbreNMM-7Nb9PAqno4,2044
@@ -79,9 +79,9 @@ adam/commands/rollout.py,sha256=Db9P4Owd3aPcRLIGhwyEElBNm_2Ke54KbiXyVKmztcE,2959
79
79
  adam/commands/shell.py,sha256=wY_PIx7Lt6vuxhFArlfxdEnBbrouCJ3yNHhFn17DEqw,848
80
80
  adam/commands/watch.py,sha256=fU2LGll-Igl08HpUQALOnh8l3s3AMGFX26NCLhqbfcw,2438
81
81
  adam/commands/audit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
- adam/commands/audit/audit.py,sha256=MBCvQn6ZwytZPC47W2aV6iVUw1a9_JHMaogsejf5mgw,2588
83
- adam/commands/audit/audit_repair_tables.py,sha256=2lyAbnni1EY01k9fGNckBvhALFLLNndnsRS_wvIt2wQ,2542
84
- adam/commands/audit/audit_run.py,sha256=2Z9VqsAejToWlSYd0JEy1FuT7_ywqTj2D7gZgBkUCKY,1578
82
+ adam/commands/audit/audit.py,sha256=DcyolsPy9Qzan5x3lwomkmkwpAeAa9PHj8Y1Q2bwI70,2584
83
+ adam/commands/audit/audit_repair_tables.py,sha256=zNyhOK_-dbeK-7rH-b_HNeH5Qwrk_ccqakWKujVP7Lk,2549
84
+ adam/commands/audit/audit_run.py,sha256=Bfzv73aaYvHtRhr_mtzeH1muJtexhPfoUPENAiGKCgI,1699
85
85
  adam/commands/cql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
86
  adam/commands/cql/cql_completions.py,sha256=29TJmNHg2_EwGyBrgh04dtZl0BDxGEnLXf_hbPQF9no,660
87
87
  adam/commands/cql/cql_utils.py,sha256=EWswF4JEj4EI_lIa1tqhNV-0sasf81SK64aNf8JPqZo,4103
@@ -176,8 +176,8 @@ adam/utils_k8s/service_accounts.py,sha256=v2oQSqCrNvt2uRnKlNwR3fjtpUG7oF5nqgzEB7
176
176
  adam/utils_k8s/services.py,sha256=EOJJGACVbbRvu5T3rMKqIJqgYic1_MSJ17EA0TJ6UOk,3156
177
177
  adam/utils_k8s/statefulsets.py,sha256=0J_cYRqH96PCcq3tdsRrs4Q4ewv5dT_FMBR0HGAJ3d8,4710
178
178
  adam/utils_k8s/volumes.py,sha256=RIBmlOSWM3V3QVXLCFT0owVOyh4rGG1ETp521a-6ndo,1137
179
- kaqing-2.0.104.dist-info/METADATA,sha256=8r0K3oowsddpx2JIoV0koNZpv9hiASyAa3XvYNFvseE,133
180
- kaqing-2.0.104.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
181
- kaqing-2.0.104.dist-info/entry_points.txt,sha256=SkzhuQJUWsXOzHeZ5TgQ2c3_g53UGK23zzJU_JTZOZI,39
182
- kaqing-2.0.104.dist-info/top_level.txt,sha256=8_2PZkwBb-xDcnc8a2rAbQeJhXKXskc7zTP7pSPa1fw,5
183
- kaqing-2.0.104.dist-info/RECORD,,
179
+ kaqing-2.0.105.dist-info/METADATA,sha256=-XxUVOCHMejB3MGXatLONCSDP3GKrOQE2MBoEXNp_U4,133
180
+ kaqing-2.0.105.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
181
+ kaqing-2.0.105.dist-info/entry_points.txt,sha256=SkzhuQJUWsXOzHeZ5TgQ2c3_g53UGK23zzJU_JTZOZI,39
182
+ kaqing-2.0.105.dist-info/top_level.txt,sha256=8_2PZkwBb-xDcnc8a2rAbQeJhXKXskc7zTP7pSPa1fw,5
183
+ kaqing-2.0.105.dist-info/RECORD,,
adam/utils_athena.py DELETED
@@ -1,162 +0,0 @@
1
- from datetime import datetime
2
- import functools
3
- import time
4
- import boto3
5
- import requests
6
-
7
- from adam.config import Config
8
- from adam.utils import lines_to_tabular, log, log2
9
-
10
- class AuditMeta:
11
- def __init__(self, checked_in: float, cluster_last_checked: float):
12
- self.checked_in = checked_in
13
- self.cluster_last_checked = cluster_last_checked
14
-
15
- def get_meta() -> AuditMeta:
16
- checked_in = 0.0
17
- cluster_last_checked = 0.0
18
-
19
- state, _, rs = audit_query(f'select partitions_last_checked, clusters_last_checked from meta')
20
- if state == 'SUCCEEDED':
21
- if len(rs) > 1:
22
- try:
23
- row = rs[1]['Data']
24
- checked_in = float(row[0]['VarCharValue'])
25
- cluster_last_checked = float(row[1]['VarCharValue'])
26
- except:
27
- pass
28
-
29
- return AuditMeta(checked_in, cluster_last_checked)
30
-
31
- def find_new_clusters(cluster_last_checked: float) -> list[str]:
32
- dt_object = datetime.fromtimestamp(cluster_last_checked)
33
-
34
- y = dt_object.strftime("%Y")
35
- m = dt_object.strftime("%m")
36
- d = dt_object.strftime("%d")
37
- # select distinct c2.name from cluster as c1 right outer join
38
- # (select distinct c as name from audit where y = '1969' and m = '12' and d >= '31' or y = '1969' and m > '12' or y > '1969') as c2
39
- # on c1.name = c2.name where c1.name is null
40
- where = f"y = '{y}' and m = '{m}' and d >= '{d}' or y = '{y}' and m > '{m}' or y > '{y}'"
41
- query = '\n '.join([
42
- 'select distinct c2.name from cluster as c1 right outer join',
43
- f'(select distinct c as name from audit where {where}) as c2',
44
- 'on c1.name = c2.name where c1.name is null'])
45
- log2(query)
46
- state, _, rs = audit_query(query)
47
- if state == 'SUCCEEDED':
48
- if len(rs) > 1:
49
- try:
50
- return [r['Data'][0]['VarCharValue'] for r in rs[1:]]
51
- except:
52
- pass
53
-
54
- return []
55
-
56
- def put_meta(action: str, meta: AuditMeta, clusters: list[str] = None):
57
- payload = {
58
- 'action': action,
59
- 'partitions-last-checked': meta.checked_in,
60
- 'clusters-last-checked': meta.cluster_last_checked
61
- }
62
- if clusters:
63
- payload['clusters'] = clusters
64
-
65
- audit_endpoint = Config().get("audit.endpoint", "https://4psvtaxlcb.execute-api.us-west-2.amazonaws.com/prod/")
66
- try:
67
- response = requests.post(audit_endpoint, json=payload, timeout=Config().get("audit.timeout", 10))
68
- if response.status_code in [200, 201]:
69
- Config().debug(response.text)
70
- else:
71
- log2(f"Error: {response.status_code} {response.text}")
72
- except requests.exceptions.Timeout as e:
73
- log2(f"Timeout occurred: {e}")
74
-
75
- @functools.lru_cache()
76
- def audit_table_names():
77
- region_name = Config().get('audit.athena.region', 'us-west-2')
78
- database_name = Config().get('audit.athena.database', 'audit')
79
- catalog_name = Config().get('audit.athena.catalog', 'AwsDataCatalog')
80
-
81
- athena_client = boto3.client('athena', region_name=region_name)
82
- paginator = athena_client.get_paginator('list_table_metadata')
83
-
84
- table_names = []
85
- for page in paginator.paginate(CatalogName=catalog_name, DatabaseName=database_name):
86
- for table_metadata in page.get('TableMetadataList', []):
87
- table_names.append(table_metadata['Name'])
88
-
89
- return table_names
90
-
91
- @functools.lru_cache()
92
- def audit_column_names(tables: list[str] = [], database: str = None, partition_cols_only = False):
93
- if not database:
94
- database = Config().get('audit.athena.database', 'audit')
95
-
96
- if not tables:
97
- tables = Config().get('audit.athena.tables', 'audit').split(',')
98
-
99
- table_names = "'" + "','".join([table.strip() for table in tables]) + "'"
100
-
101
- query = f"select column_name from information_schema.columns where table_name in ({table_names}) and table_schema = '{database}'"
102
- if partition_cols_only:
103
- query = f"{query} and extra_info = 'partition key'"
104
-
105
- _, _, rs = audit_query(query)
106
- if rs:
107
- return [row['Data'][0].get('VarCharValue') for row in rs[1:]]
108
-
109
- return []
110
-
111
- def run_audit_query(sql: str, database: str = None):
112
- state, reason, rs = audit_query(sql, database)
113
-
114
- if state == 'SUCCEEDED':
115
- if rs:
116
- column_info = rs[0]['Data']
117
- columns = [col.get('VarCharValue') for col in column_info]
118
- lines = []
119
- for row in rs[1:]:
120
- row_data = [col.get('VarCharValue') if col else '' for col in row['Data']]
121
- lines.append('\t'.join(row_data))
122
-
123
- log(lines_to_tabular(lines, header='\t'.join(columns), separator='\t'))
124
- else:
125
- log2(f"Query failed or was cancelled. State: {state}")
126
- log2(f"Reason: {reason}")
127
-
128
- def audit_query(sql: str, database: str = None) -> tuple[str, str, list]:
129
- athena_client = boto3.client('athena')
130
-
131
- if not database:
132
- database = Config().get('audit.athena.database', 'audit')
133
-
134
- s3_output_location = Config().get('audit.athena.output', 's3://s3.ops--audit/ddl/results')
135
-
136
- response = athena_client.start_query_execution(
137
- QueryString=sql,
138
- QueryExecutionContext={
139
- 'Database': database
140
- },
141
- ResultConfiguration={
142
- 'OutputLocation': s3_output_location
143
- }
144
- )
145
-
146
- query_execution_id = response['QueryExecutionId']
147
-
148
- while True:
149
- query_status = athena_client.get_query_execution(QueryExecutionId=query_execution_id)
150
- state = query_status['QueryExecution']['Status']['State']
151
- if state in ['SUCCEEDED', 'FAILED', 'CANCELLED']:
152
- break
153
- time.sleep(1)
154
-
155
- if state == 'SUCCEEDED':
156
- results_response = athena_client.get_query_results(QueryExecutionId=query_execution_id)
157
- if results_response['ResultSet']['Rows']:
158
- return (state, None, results_response['ResultSet']['Rows'])
159
-
160
- return (state, None, [])
161
- else:
162
- return (state, query_status['QueryExecution']['Status'].get('StateChangeReason'), [])