kaqing 2.0.172__py3-none-any.whl → 2.0.186__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kaqing might be problematic. Click here for more details.
- adam/app_session.py +2 -2
- adam/apps.py +18 -4
- adam/batch.py +1 -1
- adam/checks/check_utils.py +3 -1
- adam/commands/__init__.py +8 -2
- adam/commands/alter_tables.py +24 -35
- adam/commands/app/__init__.py +0 -0
- adam/commands/app/app.py +38 -0
- adam/commands/app/app_ping.py +38 -0
- adam/commands/app/show_app_actions.py +49 -0
- adam/commands/app/show_app_id.py +44 -0
- adam/commands/app/show_app_queues.py +38 -0
- adam/commands/app/utils_app.py +106 -0
- adam/commands/audit/audit.py +9 -27
- adam/commands/audit/audit_repair_tables.py +5 -7
- adam/commands/audit/audit_run.py +1 -1
- adam/commands/audit/completions_l.py +15 -0
- adam/commands/audit/show_last10.py +2 -14
- adam/commands/audit/show_slow10.py +2 -13
- adam/commands/audit/show_top10.py +2 -11
- adam/commands/audit/utils_show_top10.py +14 -1
- adam/commands/bash/bash.py +1 -1
- adam/commands/cat.py +5 -19
- adam/commands/cd.py +6 -8
- adam/commands/check.py +10 -18
- adam/commands/cli_commands.py +6 -1
- adam/commands/{cp.py → clipboard_copy.py} +34 -36
- adam/commands/code.py +2 -2
- adam/commands/command.py +94 -10
- adam/commands/commands_utils.py +19 -12
- adam/commands/cql/completions_c.py +28 -0
- adam/commands/cql/cqlsh.py +3 -7
- adam/commands/cql/utils_cql.py +22 -60
- adam/commands/deploy/deploy_pg_agent.py +2 -2
- adam/commands/deploy/undeploy_pg_agent.py +2 -2
- adam/commands/devices/device.py +39 -8
- adam/commands/devices/device_app.py +19 -29
- adam/commands/devices/device_auit_log.py +3 -3
- adam/commands/devices/device_cass.py +17 -23
- adam/commands/devices/device_export.py +12 -11
- adam/commands/devices/device_postgres.py +79 -63
- adam/commands/download_file.py +47 -0
- adam/commands/export/clean_up_all_export_sessions.py +3 -3
- adam/commands/export/clean_up_export_sessions.py +7 -19
- adam/commands/export/completions_x.py +11 -0
- adam/commands/export/download_export_session.py +40 -0
- adam/commands/export/drop_export_database.py +6 -22
- adam/commands/export/drop_export_databases.py +3 -9
- adam/commands/export/export.py +1 -17
- adam/commands/export/export_databases.py +93 -21
- adam/commands/export/export_select.py +8 -68
- adam/commands/export/export_sessions.py +209 -0
- adam/commands/export/export_use.py +13 -16
- adam/commands/export/export_x_select.py +48 -0
- adam/commands/export/exporter.py +108 -129
- adam/commands/export/import_files.py +44 -0
- adam/commands/export/import_session.py +10 -6
- adam/commands/export/importer.py +19 -5
- adam/commands/export/importer_athena.py +112 -41
- adam/commands/export/importer_sqlite.py +47 -19
- adam/commands/export/show_column_counts.py +11 -20
- adam/commands/export/show_export_databases.py +5 -2
- adam/commands/export/show_export_session.py +6 -15
- adam/commands/export/show_export_sessions.py +4 -11
- adam/commands/export/utils_export.py +46 -16
- adam/commands/find_files.py +51 -0
- adam/commands/find_processes.py +76 -0
- adam/commands/head.py +36 -0
- adam/commands/help.py +2 -2
- adam/commands/intermediate_command.py +6 -3
- adam/commands/ls.py +1 -1
- adam/commands/medusa/medusa_backup.py +13 -16
- adam/commands/medusa/medusa_restore.py +39 -32
- adam/commands/medusa/medusa_show_backupjobs.py +6 -4
- adam/commands/medusa/medusa_show_restorejobs.py +5 -3
- adam/commands/medusa/utils_medusa.py +15 -0
- adam/commands/nodetool.py +3 -8
- adam/commands/param_get.py +10 -12
- adam/commands/param_set.py +7 -10
- adam/commands/postgres/completions_p.py +22 -0
- adam/commands/postgres/postgres.py +25 -40
- adam/commands/postgres/postgres_databases.py +270 -0
- adam/commands/postgres/utils_postgres.py +33 -20
- adam/commands/preview_table.py +4 -2
- adam/commands/pwd.py +3 -3
- adam/commands/reaper/reaper_forward.py +2 -2
- adam/commands/reaper/reaper_run_abort.py +4 -10
- adam/commands/reaper/reaper_runs.py +3 -3
- adam/commands/reaper/reaper_schedule_activate.py +12 -12
- adam/commands/reaper/reaper_schedule_start.py +7 -12
- adam/commands/reaper/reaper_schedule_stop.py +7 -12
- adam/commands/reaper/utils_reaper.py +13 -6
- adam/commands/repair/repair_scan.py +0 -2
- adam/commands/repair/repair_stop.py +0 -1
- adam/commands/shell.py +7 -5
- adam/commands/show/show.py +1 -1
- adam/commands/show/show_adam.py +3 -3
- adam/commands/show/show_cassandra_repairs.py +5 -3
- adam/commands/show/show_cassandra_status.py +27 -20
- adam/commands/show/{show_commands.py → show_cli_commands.py} +2 -2
- adam/commands/show/show_login.py +2 -2
- adam/commands/show/show_params.py +2 -5
- adam/commands/show/show_processes.py +15 -14
- adam/commands/show/show_storage.py +9 -8
- adam/config.py +1 -0
- adam/embedded_params.py +1 -1
- adam/repl.py +20 -11
- adam/repl_commands.py +16 -9
- adam/repl_session.py +8 -1
- adam/repl_state.py +33 -10
- adam/sql/lark_completer.py +280 -0
- adam/sql/lark_parser.py +604 -0
- adam/sql/sql_state_machine.py +8 -2
- adam/utils.py +116 -29
- adam/utils_athena.py +7 -8
- adam/utils_issues.py +2 -2
- adam/utils_k8s/app_clusters.py +2 -2
- adam/utils_k8s/app_pods.py +5 -2
- adam/utils_k8s/cassandra_clusters.py +11 -3
- adam/utils_k8s/cassandra_nodes.py +2 -2
- adam/utils_k8s/k8s.py +14 -5
- adam/utils_k8s/kube_context.py +2 -2
- adam/utils_k8s/pods.py +23 -5
- adam/utils_k8s/statefulsets.py +5 -2
- adam/utils_local.py +4 -0
- adam/utils_repl/appendable_completer.py +6 -0
- adam/utils_repl/repl_completer.py +128 -2
- adam/utils_sqlite.py +14 -14
- adam/version.py +1 -1
- {kaqing-2.0.172.dist-info → kaqing-2.0.186.dist-info}/METADATA +1 -1
- kaqing-2.0.186.dist-info/RECORD +250 -0
- adam/commands/cql/cql_completions.py +0 -33
- adam/commands/export/export_handlers.py +0 -71
- adam/commands/export/export_select_x.py +0 -54
- adam/commands/postgres/postgres_context.py +0 -272
- adam/commands/postgres/psql_completions.py +0 -10
- kaqing-2.0.172.dist-info/RECORD +0 -230
- {kaqing-2.0.172.dist-info → kaqing-2.0.186.dist-info}/WHEEL +0 -0
- {kaqing-2.0.172.dist-info → kaqing-2.0.186.dist-info}/entry_points.txt +0 -0
- {kaqing-2.0.172.dist-info → kaqing-2.0.186.dist-info}/top_level.txt +0 -0
adam/commands/export/exporter.py
CHANGED
|
@@ -1,21 +1,20 @@
|
|
|
1
1
|
from datetime import datetime
|
|
2
|
-
import functools
|
|
3
|
-
import re
|
|
4
2
|
import time
|
|
5
3
|
|
|
4
|
+
from adam.commands.command import InvalidArgumentsException
|
|
6
5
|
from adam.commands.cql.utils_cql import cassandra_table_names, run_cql, table_spec
|
|
7
|
-
from adam.commands.export.export_databases import
|
|
6
|
+
from adam.commands.export.export_databases import export_db
|
|
7
|
+
from adam.commands.export.export_sessions import ExportSessions
|
|
8
8
|
from adam.commands.export.importer import Importer
|
|
9
9
|
from adam.commands.export.importer_athena import AthenaImporter
|
|
10
10
|
from adam.commands.export.importer_sqlite import SqliteImporter
|
|
11
|
-
from adam.commands.export.utils_export import ExportSpec, ExportTableStatus, ExportTableSpec, ImportSpec, csv_dir, find_files
|
|
11
|
+
from adam.commands.export.utils_export import ExportSpec, ExportTableStatus, ExportTableSpec, ImportSpec, csv_dir, find_files, state_with_pod
|
|
12
12
|
from adam.config import Config
|
|
13
13
|
from adam.pod_exec_result import PodExecResult
|
|
14
14
|
from adam.repl_state import ReplState
|
|
15
|
-
from adam.utils import debug, parallelize, log2, ing, log_exc
|
|
15
|
+
from adam.utils import debug, log, parallelize, log2, ing, log_exc
|
|
16
16
|
from adam.utils_k8s.cassandra_nodes import CassandraNodes
|
|
17
17
|
from adam.utils_k8s.pods import log_prefix
|
|
18
|
-
from adam.utils_k8s.statefulsets import StatefulSets
|
|
19
18
|
|
|
20
19
|
class Exporter:
|
|
21
20
|
def export_tables(args: list[str], state: ReplState, export_only: bool = False, max_workers = 0) -> tuple[list[str], ExportSpec]:
|
|
@@ -69,10 +68,10 @@ class Exporter:
|
|
|
69
68
|
|
|
70
69
|
return spec
|
|
71
70
|
|
|
72
|
-
def import_session(
|
|
71
|
+
def import_session(spec_str: str, state: ReplState, max_workers = 0) -> tuple[list[str], ExportSpec]:
|
|
73
72
|
import_spec: ImportSpec = None
|
|
74
73
|
with log_exc(True):
|
|
75
|
-
import_spec = Exporter.import_spec(
|
|
74
|
+
import_spec = Exporter.import_spec(spec_str, state)
|
|
76
75
|
tables, status_in_whole = ExportTableStatus.from_session(state.sts, state.pod, state.namespace, import_spec.session)
|
|
77
76
|
if status_in_whole == 'done':
|
|
78
77
|
log2(f'The session has been completely done - no more csv files are found.')
|
|
@@ -84,8 +83,30 @@ class Exporter:
|
|
|
84
83
|
|
|
85
84
|
return [], None
|
|
86
85
|
|
|
87
|
-
def
|
|
88
|
-
spec: ImportSpec =
|
|
86
|
+
def import_local_csv_files(spec_str: str, state: ReplState, max_workers = 0) -> tuple[list[str], ExportSpec]:
|
|
87
|
+
spec: ImportSpec = None
|
|
88
|
+
with log_exc(True):
|
|
89
|
+
spec = Exporter.import_spec(spec_str, state, files=True)
|
|
90
|
+
if not spec.table_name:
|
|
91
|
+
log2(f"Use 'as <database-name>.<table-name>'.")
|
|
92
|
+
raise InvalidArgumentsException()
|
|
93
|
+
|
|
94
|
+
d_t = spec.table_name.split('.')
|
|
95
|
+
if len(d_t) != 2:
|
|
96
|
+
log2(f'Need <database-name>.<table-name> format for target table.')
|
|
97
|
+
raise InvalidArgumentsException()
|
|
98
|
+
|
|
99
|
+
database = d_t[0]
|
|
100
|
+
table = d_t[1]
|
|
101
|
+
im = AthenaImporter() if spec.importer == 'athena' else SqliteImporter()
|
|
102
|
+
|
|
103
|
+
with parallelize(spec.files, max_workers, msg='Importing|Imported {size} csv files') as exec:
|
|
104
|
+
return exec.map(lambda f: im.import_from_local_csv(state, database, table, f, len(spec.files) > 1, True)), spec
|
|
105
|
+
|
|
106
|
+
return [], None
|
|
107
|
+
|
|
108
|
+
def import_spec(spec_str: str, state: ReplState, files = False):
|
|
109
|
+
spec: ImportSpec = ImportSpec.parse_specs(spec_str, files=files)
|
|
89
110
|
|
|
90
111
|
session = state.export_session
|
|
91
112
|
if session:
|
|
@@ -104,15 +125,19 @@ class Exporter:
|
|
|
104
125
|
if not spec.importer:
|
|
105
126
|
spec.importer = Importer.importer_from_session(spec.session)
|
|
106
127
|
|
|
107
|
-
if spec.importer == 'csv':
|
|
128
|
+
if not spec.importer or spec.importer == 'csv':
|
|
108
129
|
spec.importer = Config().get('export.default-importer', 'sqlite')
|
|
109
130
|
|
|
110
131
|
if spec.importer == 'athena' and not AthenaImporter.ping():
|
|
111
132
|
raise Exception('Credentials for Athena is not present.')
|
|
112
133
|
|
|
113
134
|
prefix = Importer.prefix_from_importer(spec.importer)
|
|
114
|
-
|
|
115
|
-
|
|
135
|
+
if spec.session:
|
|
136
|
+
spec.session = f'{prefix}{spec.session[1:]}'
|
|
137
|
+
else:
|
|
138
|
+
spec.session = f'{prefix}{datetime.now().strftime("%Y%m%d%H%M%S")[3:]}'
|
|
139
|
+
|
|
140
|
+
state.export_session = spec.session
|
|
116
141
|
|
|
117
142
|
return spec
|
|
118
143
|
|
|
@@ -196,7 +221,7 @@ class Exporter:
|
|
|
196
221
|
queries.append(f"COPY {spec.keyspace}.{table}({columns}) TO '{csv_file}' WITH HEADER = TRUE")
|
|
197
222
|
r: PodExecResult = ing(
|
|
198
223
|
f'[{session}] Dumping table {spec.keyspace}.{table}{f" with consistency {consistency}" if consistency else ""}',
|
|
199
|
-
lambda: run_cql(state, ';'.join(queries), show_out=Config().is_debug(),
|
|
224
|
+
lambda: run_cql(state, ';'.join(queries), show_out=Config().is_debug(), backgrounded=True, log_file=log_file),
|
|
200
225
|
suppress_log=suppress_ing_log)
|
|
201
226
|
|
|
202
227
|
return log_file
|
|
@@ -211,109 +236,7 @@ class Exporter:
|
|
|
211
236
|
|
|
212
237
|
def import_from_csv(spec: ExportTableSpec, state: ReplState, session: str, importer: str, table: str, target_table: str, columns: str, multi_tables = True, create_db = False):
|
|
213
238
|
im = AthenaImporter() if importer == 'athena' else SqliteImporter()
|
|
214
|
-
return im.import_from_csv(state
|
|
215
|
-
|
|
216
|
-
def clear_export_session_cache():
|
|
217
|
-
Exporter.find_export_sessions.cache_clear()
|
|
218
|
-
Exporter.export_session_names.cache_clear()
|
|
219
|
-
|
|
220
|
-
@functools.lru_cache()
|
|
221
|
-
def export_session_names(sts: str, pod: str, namespace: str, importer: str = None, export_state = None):
|
|
222
|
-
if not sts or not namespace:
|
|
223
|
-
return []
|
|
224
|
-
|
|
225
|
-
if not pod:
|
|
226
|
-
pod = StatefulSets.pod_names(sts, namespace)[0]
|
|
227
|
-
|
|
228
|
-
if not pod:
|
|
229
|
-
return []
|
|
230
|
-
|
|
231
|
-
return [session for session, state in Exporter.find_export_sessions(pod, namespace, importer).items() if not export_state or state == export_state]
|
|
232
|
-
|
|
233
|
-
@functools.lru_cache()
|
|
234
|
-
def find_export_sessions(pod: str, namespace: str, importer: str = None, limit = 100):
|
|
235
|
-
sessions: dict[str, str] = {}
|
|
236
|
-
|
|
237
|
-
prefix = Importer.prefix_from_importer(importer)
|
|
238
|
-
|
|
239
|
-
log_files: list[str] = find_files(pod, namespace, f'{log_prefix()}-{prefix}*_*.log*')
|
|
240
|
-
|
|
241
|
-
if not log_files:
|
|
242
|
-
return {}
|
|
243
|
-
|
|
244
|
-
for log_file in log_files[:limit]:
|
|
245
|
-
m = re.match(f'{log_prefix()}-(.*?)_.*\.log?(.*)', log_file)
|
|
246
|
-
if m:
|
|
247
|
-
s = m.group(1)
|
|
248
|
-
state = m.group(2) # '', '.pending_import', '.done'
|
|
249
|
-
if state:
|
|
250
|
-
state = state.strip('.')
|
|
251
|
-
else:
|
|
252
|
-
state = 'in_export'
|
|
253
|
-
|
|
254
|
-
if s not in sessions:
|
|
255
|
-
sessions[s] = state
|
|
256
|
-
elif sessions[s] == 'done' and state != 'done':
|
|
257
|
-
sessions[s] = state
|
|
258
|
-
|
|
259
|
-
return sessions
|
|
260
|
-
|
|
261
|
-
def clean_up_all_sessions(sts: str, pod: str, namespace: str):
|
|
262
|
-
if not sts or not namespace:
|
|
263
|
-
return False
|
|
264
|
-
|
|
265
|
-
if not pod:
|
|
266
|
-
pod = StatefulSets.pod_names(sts, namespace)[0]
|
|
267
|
-
|
|
268
|
-
CassandraNodes.exec(pod, namespace, f'rm -rf {csv_dir()}/*', show_out=Config().is_debug(), shell='bash')
|
|
269
|
-
CassandraNodes.exec(pod, namespace, f'rm -rf {log_prefix()}-*.log*', show_out=Config().is_debug(), shell='bash')
|
|
270
|
-
|
|
271
|
-
return True
|
|
272
|
-
|
|
273
|
-
def clean_up_sessions(sts: str, pod: str, namespace: str, sessions: list[str], max_workers = 0):
|
|
274
|
-
if not sessions:
|
|
275
|
-
return []
|
|
276
|
-
|
|
277
|
-
if not max_workers:
|
|
278
|
-
max_workers = Config().action_workers('export', 8)
|
|
279
|
-
|
|
280
|
-
with parallelize(sessions, max_workers, msg='Cleaning|Cleaned up {size} export sessions') as exec:
|
|
281
|
-
cnt_tuples = exec.map(lambda session: Exporter.clean_up_session(sts, pod, namespace, session, True))
|
|
282
|
-
csv_cnt = 0
|
|
283
|
-
log_cnt = 0
|
|
284
|
-
for (csv, log) in cnt_tuples:
|
|
285
|
-
csv_cnt += csv
|
|
286
|
-
log_cnt += log
|
|
287
|
-
|
|
288
|
-
return csv_cnt, log_cnt
|
|
289
|
-
|
|
290
|
-
def clean_up_session(sts: str, pod: str, namespace: str, session: str, multi_tables = True):
|
|
291
|
-
if not sts or not namespace:
|
|
292
|
-
return 0, 0
|
|
293
|
-
|
|
294
|
-
if not pod:
|
|
295
|
-
pod = StatefulSets.pod_names(sts, namespace)[0]
|
|
296
|
-
|
|
297
|
-
if not pod:
|
|
298
|
-
return 0, 0
|
|
299
|
-
|
|
300
|
-
csv_cnt = 0
|
|
301
|
-
log_cnt = 0
|
|
302
|
-
|
|
303
|
-
log_files: list[str] = find_files(pod, namespace, f'{log_prefix()}-{session}_*.log*')
|
|
304
|
-
|
|
305
|
-
for log_file in log_files:
|
|
306
|
-
m = re.match(f'{log_prefix()}-{session}_(.*?)\.(.*?)\.log.*', log_file)
|
|
307
|
-
if m:
|
|
308
|
-
table = m.group(2)
|
|
309
|
-
|
|
310
|
-
CassandraNodes.exec(pod, namespace, f'rm -rf {csv_dir()}/{session}_{table}', show_out=not multi_tables, shell='bash')
|
|
311
|
-
csv_cnt += 1
|
|
312
|
-
|
|
313
|
-
CassandraNodes.exec(pod, namespace, f'rm -rf {log_file}', show_out=not multi_tables, shell='bash')
|
|
314
|
-
log_cnt += 1
|
|
315
|
-
|
|
316
|
-
return csv_cnt, log_cnt
|
|
239
|
+
return im.import_from_csv(state, session if session else state.export_session, spec.keyspace, table, target_table, columns, multi_tables, create_db)
|
|
317
240
|
|
|
318
241
|
def resove_table_n_columns(spec: ExportTableSpec, state: ReplState, include_ks_in_target = False, importer = 'sqlite'):
|
|
319
242
|
table = spec.table
|
|
@@ -339,15 +262,71 @@ class Exporter:
|
|
|
339
262
|
|
|
340
263
|
return table, target_table, columns
|
|
341
264
|
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
with
|
|
352
|
-
|
|
353
|
-
Exporter.
|
|
265
|
+
class ExportService:
|
|
266
|
+
def __init__(self, handler: 'ExporterHandler'):
|
|
267
|
+
self.handler = handler
|
|
268
|
+
|
|
269
|
+
def export(self, args: list[str], export_only=False):
|
|
270
|
+
state = self.handler.state
|
|
271
|
+
export_session = state.export_session
|
|
272
|
+
spec: ExportSpec = None
|
|
273
|
+
try:
|
|
274
|
+
with state_with_pod(state) as state:
|
|
275
|
+
# --export-only for testing only
|
|
276
|
+
statuses, spec = Exporter.export_tables(args, state, export_only=export_only)
|
|
277
|
+
if not statuses:
|
|
278
|
+
return state
|
|
279
|
+
|
|
280
|
+
ExportSessions.clear_export_session_cache()
|
|
281
|
+
|
|
282
|
+
if spec.importer == 'csv' or export_only:
|
|
283
|
+
ExportSessions.show_session(state.sts, state.pod, state.namespace, spec.session)
|
|
284
|
+
else:
|
|
285
|
+
log()
|
|
286
|
+
with export_db(state) as dbs:
|
|
287
|
+
dbs.show_database()
|
|
288
|
+
finally:
|
|
289
|
+
# if exporting to csv, do not bind the new session id to repl state
|
|
290
|
+
if spec and spec.importer == 'csv':
|
|
291
|
+
state.export_session = export_session
|
|
292
|
+
|
|
293
|
+
return state
|
|
294
|
+
|
|
295
|
+
def import_session(self, spec_str: str):
|
|
296
|
+
state = self.handler.state
|
|
297
|
+
|
|
298
|
+
tables, _ = Exporter.import_session(spec_str, state)
|
|
299
|
+
if tables:
|
|
300
|
+
ExportSessions.clear_export_session_cache()
|
|
301
|
+
|
|
302
|
+
log()
|
|
303
|
+
with export_db(state) as dbs:
|
|
304
|
+
dbs.show_database()
|
|
305
|
+
|
|
306
|
+
return state
|
|
307
|
+
|
|
308
|
+
def import_files(self, spec_str: str):
|
|
309
|
+
state = self.handler.state
|
|
310
|
+
|
|
311
|
+
tables, _ = Exporter.import_local_csv_files(spec_str, state)
|
|
312
|
+
if tables:
|
|
313
|
+
ExportSessions.clear_export_session_cache()
|
|
314
|
+
|
|
315
|
+
log()
|
|
316
|
+
with export_db(state) as dbs:
|
|
317
|
+
dbs.show_database()
|
|
318
|
+
|
|
319
|
+
return state
|
|
320
|
+
|
|
321
|
+
class ExporterHandler:
|
|
322
|
+
def __init__(self, state: ReplState):
|
|
323
|
+
self.state = state
|
|
324
|
+
|
|
325
|
+
def __enter__(self):
|
|
326
|
+
return ExportService(self)
|
|
327
|
+
|
|
328
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
329
|
+
return False
|
|
330
|
+
|
|
331
|
+
def export(state: ReplState):
|
|
332
|
+
return ExporterHandler(state)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from adam.commands import validate_args
|
|
2
|
+
from adam.commands.command import Command
|
|
3
|
+
from adam.commands.export.export_sessions import ExportSessions
|
|
4
|
+
from adam.commands.export.exporter import export
|
|
5
|
+
from adam.commands.export.utils_export import state_with_pod
|
|
6
|
+
from adam.repl_state import ReplState, RequiredState
|
|
7
|
+
|
|
8
|
+
class ImportCSVFiles(Command):
|
|
9
|
+
COMMAND = 'import files'
|
|
10
|
+
|
|
11
|
+
# the singleton pattern
|
|
12
|
+
def __new__(cls, *args, **kwargs):
|
|
13
|
+
if not hasattr(cls, 'instance'): cls.instance = super(ImportCSVFiles, cls).__new__(cls)
|
|
14
|
+
|
|
15
|
+
return cls.instance
|
|
16
|
+
|
|
17
|
+
def __init__(self, successor: Command=None):
|
|
18
|
+
super().__init__(successor)
|
|
19
|
+
|
|
20
|
+
def command(self):
|
|
21
|
+
return ImportCSVFiles.COMMAND
|
|
22
|
+
|
|
23
|
+
def required(self):
|
|
24
|
+
return RequiredState.CLUSTER_OR_POD
|
|
25
|
+
|
|
26
|
+
def run(self, cmd: str, state: ReplState):
|
|
27
|
+
if not(args := self.args(cmd)):
|
|
28
|
+
return super().run(cmd, state)
|
|
29
|
+
|
|
30
|
+
with self.validate(args, state) as (args, state):
|
|
31
|
+
with validate_args(args, state, name='file') as spec:
|
|
32
|
+
with state_with_pod(state) as state:
|
|
33
|
+
with export(state) as exporter:
|
|
34
|
+
return exporter.import_files(spec)
|
|
35
|
+
|
|
36
|
+
def completion(self, state: ReplState):
|
|
37
|
+
# warm up cache
|
|
38
|
+
# ExportSessions.export_session_names(state.sts, state.pod, state.namespace)
|
|
39
|
+
# ExportSessions.export_session_names(state.sts, state.pod, state.namespace, export_state='pending_import')
|
|
40
|
+
|
|
41
|
+
return {}
|
|
42
|
+
|
|
43
|
+
def help(self, _: ReplState):
|
|
44
|
+
return f'{ImportCSVFiles.COMMAND} <file-names,...>\t import files in session to Athena or SQLite'
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
from adam.commands import validate_args
|
|
1
2
|
from adam.commands.command import Command
|
|
2
|
-
from adam.commands.export.
|
|
3
|
-
from adam.commands.export.exporter import
|
|
3
|
+
from adam.commands.export.export_sessions import ExportSessions
|
|
4
|
+
from adam.commands.export.exporter import export
|
|
5
|
+
from adam.commands.export.utils_export import state_with_pod
|
|
4
6
|
from adam.repl_state import ReplState, RequiredState
|
|
5
7
|
|
|
6
8
|
class ImportSession(Command):
|
|
@@ -26,13 +28,15 @@ class ImportSession(Command):
|
|
|
26
28
|
return super().run(cmd, state)
|
|
27
29
|
|
|
28
30
|
with self.validate(args, state) as (args, state):
|
|
29
|
-
with
|
|
30
|
-
|
|
31
|
+
with validate_args(args, state, name='export session') as spec:
|
|
32
|
+
with state_with_pod(state) as state:
|
|
33
|
+
with export(state) as exporter:
|
|
34
|
+
return exporter.import_session(spec)
|
|
31
35
|
|
|
32
36
|
def completion(self, state: ReplState):
|
|
33
37
|
# warm up cache
|
|
34
|
-
|
|
35
|
-
|
|
38
|
+
# ExportSessions.export_session_names(state.sts, state.pod, state.namespace)
|
|
39
|
+
# ExportSessions.export_session_names(state.sts, state.pod, state.namespace, export_state='pending_import')
|
|
36
40
|
|
|
37
41
|
return {}
|
|
38
42
|
|
adam/commands/export/importer.py
CHANGED
|
@@ -2,6 +2,7 @@ from abc import abstractmethod
|
|
|
2
2
|
|
|
3
3
|
from adam.commands.export.utils_export import csv_dir
|
|
4
4
|
from adam.config import Config
|
|
5
|
+
from adam.repl_state import ReplState
|
|
5
6
|
from adam.utils import ing
|
|
6
7
|
from adam.utils_k8s.cassandra_nodes import CassandraNodes
|
|
7
8
|
from adam.utils_k8s.pods import log_prefix
|
|
@@ -12,10 +13,20 @@ class Importer:
|
|
|
12
13
|
pass
|
|
13
14
|
|
|
14
15
|
@abstractmethod
|
|
15
|
-
def import_from_csv(self,
|
|
16
|
+
def import_from_csv(self, state: ReplState, from_session: str, keyspace: str, table: str, target_table: str, columns: str, multi_tables = True, create_db = False):
|
|
16
17
|
pass
|
|
17
18
|
|
|
18
|
-
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def import_from_local_csv(self, state: ReplState,
|
|
21
|
+
keyspace: str, table: str, target_table: str, columns: str,
|
|
22
|
+
csv_file: str,
|
|
23
|
+
multi_tables = True, create_db = False):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
def move_to_done(self, state: ReplState, from_session: str, keyspace: str, target_table: str):
|
|
27
|
+
pod = state.pod
|
|
28
|
+
namespace = state.namespace
|
|
29
|
+
to_session = state.export_session
|
|
19
30
|
log_file = f'{log_prefix()}-{from_session}_{keyspace}.{target_table}.log.pending_import'
|
|
20
31
|
|
|
21
32
|
to = f'{log_prefix()}-{to_session}_{keyspace}.{target_table}.log.done'
|
|
@@ -30,9 +41,12 @@ class Importer:
|
|
|
30
41
|
|
|
31
42
|
return session
|
|
32
43
|
|
|
33
|
-
def remove_csv(self,
|
|
34
|
-
|
|
35
|
-
|
|
44
|
+
def remove_csv(self, state: ReplState, from_session: str, table: str, target_table: str, multi_tables = True):
|
|
45
|
+
pod = state.pod
|
|
46
|
+
namespace = state.namespace
|
|
47
|
+
|
|
48
|
+
with ing(f'[{from_session}] Cleaning up temporary files', suppress_log=multi_tables):
|
|
49
|
+
CassandraNodes.exec(pod, namespace, f'rm -rf {self.csv_file(from_session, table, target_table)}', show_out=Config().is_debug(), shell='bash')
|
|
36
50
|
|
|
37
51
|
def db(self, session: str, keyspace: str):
|
|
38
52
|
return f'{session}_{keyspace}'
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
import os
|
|
1
2
|
import boto3
|
|
2
3
|
|
|
4
|
+
from adam.commands.export.export_databases import export_db
|
|
3
5
|
from adam.commands.export.importer import Importer
|
|
4
|
-
from adam.commands.export.utils_export import GeneratorStream
|
|
5
6
|
from adam.config import Config
|
|
6
|
-
from adam.
|
|
7
|
+
from adam.repl_state import ReplState
|
|
8
|
+
from adam.utils import GeneratorStream, bytes_generator_from_file, debug, log2, ing
|
|
7
9
|
from adam.utils_athena import Athena
|
|
8
10
|
from adam.utils_k8s.pods import Pods
|
|
9
11
|
|
|
@@ -17,9 +19,14 @@ class AthenaImporter(Importer):
|
|
|
17
19
|
def prefix(self):
|
|
18
20
|
return 'e'
|
|
19
21
|
|
|
20
|
-
def import_from_csv(self,
|
|
22
|
+
def import_from_csv(self, state: ReplState, from_session: str,
|
|
23
|
+
keyspace: str, table: str, target_table: str, columns: str,
|
|
24
|
+
multi_tables = True, create_db = False):
|
|
21
25
|
csv_file = self.csv_file(from_session, table, target_table)
|
|
22
|
-
|
|
26
|
+
pod = state.pod
|
|
27
|
+
namespace = state.namespace
|
|
28
|
+
to_session = state.export_session
|
|
29
|
+
database = self.db(to_session, keyspace)
|
|
23
30
|
|
|
24
31
|
succeeded = False
|
|
25
32
|
try:
|
|
@@ -29,49 +36,113 @@ class AthenaImporter(Importer):
|
|
|
29
36
|
bytes = Pods.read_file(pod, 'cassandra', namespace, csv_file)
|
|
30
37
|
|
|
31
38
|
s3 = boto3.client('s3')
|
|
32
|
-
s3.upload_fileobj(GeneratorStream(bytes), bucket, f'export/{
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
else:
|
|
38
|
-
msg = f"[{to_session}] Creating table {target_table}"
|
|
39
|
-
with ing(msg, suppress_log=multi_tables):
|
|
40
|
-
query = f'CREATE DATABASE IF NOT EXISTS {db};'
|
|
41
|
-
debug(query)
|
|
42
|
-
Athena.query(query, 'default')
|
|
43
|
-
|
|
44
|
-
query = f'DROP TABLE IF EXISTS {target_table};'
|
|
45
|
-
debug(query)
|
|
46
|
-
Athena.query(query, db)
|
|
47
|
-
|
|
48
|
-
athena_columns = ', '.join([f'{c} string' for c in columns.split(',')])
|
|
49
|
-
query = f'CREATE EXTERNAL TABLE IF NOT EXISTS {target_table}(\n' + \
|
|
50
|
-
f' {athena_columns})\n' + \
|
|
51
|
-
"ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'\n" + \
|
|
52
|
-
'WITH SERDEPROPERTIES (\n' + \
|
|
53
|
-
' "separatorChar" = ",",\n' + \
|
|
54
|
-
' "quoteChar" = "\\"")\n' + \
|
|
55
|
-
f"LOCATION 's3://{bucket}/export/{db}/{keyspace}/{target_table}'\n" + \
|
|
56
|
-
'TBLPROPERTIES ("skip.header.line.count"="1");'
|
|
57
|
-
debug(query)
|
|
58
|
-
try:
|
|
59
|
-
Athena.query(query, db)
|
|
60
|
-
except Exception as e:
|
|
61
|
-
log2(f'*** Failed query:\n{query}')
|
|
62
|
-
raise e
|
|
63
|
-
|
|
64
|
-
to, _ = self.move_to_done(pod, namespace, to_session, from_session, keyspace, target_table)
|
|
39
|
+
s3.upload_fileobj(GeneratorStream(bytes), bucket, f'export/{database}/{keyspace}/{target_table}/{table}.csv')
|
|
40
|
+
|
|
41
|
+
self.create_schema(to_session, bucket, database, keyspace, table, columns, multi_tables, create_db)
|
|
42
|
+
|
|
43
|
+
to, _ = self.move_to_done(state, from_session, keyspace, target_table)
|
|
65
44
|
|
|
66
45
|
succeeded = True
|
|
67
46
|
|
|
68
47
|
return to, to_session
|
|
69
48
|
finally:
|
|
70
49
|
if succeeded:
|
|
71
|
-
self.remove_csv(
|
|
50
|
+
self.remove_csv(state, from_session, table, target_table, multi_tables)
|
|
72
51
|
Athena.clear_cache()
|
|
73
52
|
|
|
74
53
|
if not multi_tables:
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
54
|
+
with export_db(state) as dbs:
|
|
55
|
+
dbs.sql(f'select * from {database}.{target_table} limit 10')
|
|
56
|
+
|
|
57
|
+
def import_from_local_csv(self, state: ReplState,
|
|
58
|
+
keyspace: str, table: str, csv_file: str, multi_tables = True, create_db = False):
|
|
59
|
+
to_session = state.export_session
|
|
60
|
+
database = self.db(to_session, keyspace)
|
|
61
|
+
|
|
62
|
+
succeeded = False
|
|
63
|
+
try:
|
|
64
|
+
columns = None
|
|
65
|
+
with open(csv_file, 'r') as f:
|
|
66
|
+
columns = f.readline()
|
|
67
|
+
|
|
68
|
+
bucket = Config().get('export.bucket', 'c3.ops--qing')
|
|
69
|
+
|
|
70
|
+
with ing(f'[{to_session}] Uploading to S3', suppress_log=multi_tables):
|
|
71
|
+
bytes = bytes_generator_from_file(csv_file)
|
|
72
|
+
|
|
73
|
+
s3 = boto3.client('s3')
|
|
74
|
+
s3.upload_fileobj(GeneratorStream(bytes), bucket, f'export/{database}/{keyspace}/{table}/{os.path.basename(csv_file)}')
|
|
75
|
+
|
|
76
|
+
self.create_schema(to_session, bucket, database, keyspace, table, columns, multi_tables, create_db)
|
|
77
|
+
# msg: str = None
|
|
78
|
+
# if create_db:
|
|
79
|
+
# msg = f"[{to_session}] Creating database {database}"
|
|
80
|
+
# else:
|
|
81
|
+
# msg = f"[{to_session}] Creating table {target_table}"
|
|
82
|
+
# with ing(msg, suppress_log=multi_tables):
|
|
83
|
+
# query = f'CREATE DATABASE IF NOT EXISTS {database};'
|
|
84
|
+
# debug(query)
|
|
85
|
+
# Athena.query(query, 'default')
|
|
86
|
+
|
|
87
|
+
# query = f'DROP TABLE IF EXISTS {target_table};'
|
|
88
|
+
# debug(query)
|
|
89
|
+
# Athena.query(query, database)
|
|
90
|
+
|
|
91
|
+
# athena_columns = ', '.join([f'{c} string' for c in columns.split(',')])
|
|
92
|
+
# query = f'CREATE EXTERNAL TABLE IF NOT EXISTS {target_table}(\n' + \
|
|
93
|
+
# f' {athena_columns})\n' + \
|
|
94
|
+
# "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'\n" + \
|
|
95
|
+
# 'WITH SERDEPROPERTIES (\n' + \
|
|
96
|
+
# ' "separatorChar" = ",",\n' + \
|
|
97
|
+
# ' "quoteChar" = "\\"")\n' + \
|
|
98
|
+
# f"LOCATION 's3://{bucket}/export/{database}/{keyspace}/{target_table}'\n" + \
|
|
99
|
+
# 'TBLPROPERTIES ("skip.header.line.count"="1");'
|
|
100
|
+
# debug(query)
|
|
101
|
+
# try:
|
|
102
|
+
# Athena.query(query, database)
|
|
103
|
+
# except Exception as e:
|
|
104
|
+
# log2(f'*** Failed query:\n{query}')
|
|
105
|
+
# raise e
|
|
106
|
+
|
|
107
|
+
succeeded = True
|
|
108
|
+
|
|
109
|
+
return csv_file, to_session
|
|
110
|
+
finally:
|
|
111
|
+
if succeeded:
|
|
112
|
+
Athena.clear_cache()
|
|
113
|
+
|
|
114
|
+
if not multi_tables:
|
|
115
|
+
with export_db(state) as dbs:
|
|
116
|
+
dbs.sql(f'select * from {database}.{table} limit 10')
|
|
117
|
+
|
|
118
|
+
def create_schema(self, to_session: str, bucket: str, database: str, keyspace: str, table: str, columns: list[str], multi_tables: bool, create_db = False):
|
|
119
|
+
msg: str = None
|
|
120
|
+
if create_db:
|
|
121
|
+
msg = f"[{to_session}] Creating database {database}"
|
|
122
|
+
else:
|
|
123
|
+
msg = f"[{to_session}] Creating table {table}"
|
|
124
|
+
|
|
125
|
+
with ing(msg, suppress_log=multi_tables):
|
|
126
|
+
query = f'CREATE DATABASE IF NOT EXISTS {database};'
|
|
127
|
+
debug(query)
|
|
128
|
+
Athena.query(query, 'default')
|
|
129
|
+
|
|
130
|
+
query = f'DROP TABLE IF EXISTS {table};'
|
|
131
|
+
debug(query)
|
|
132
|
+
Athena.query(query, database)
|
|
133
|
+
|
|
134
|
+
athena_columns = ', '.join([f'{c} string' for c in columns.split(',')])
|
|
135
|
+
query = f'CREATE EXTERNAL TABLE IF NOT EXISTS {table}(\n' + \
|
|
136
|
+
f' {athena_columns})\n' + \
|
|
137
|
+
"ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'\n" + \
|
|
138
|
+
'WITH SERDEPROPERTIES (\n' + \
|
|
139
|
+
' "separatorChar" = ",",\n' + \
|
|
140
|
+
' "quoteChar" = "\\"")\n' + \
|
|
141
|
+
f"LOCATION 's3://{bucket}/export/{database}/{keyspace}/{table}'\n" + \
|
|
142
|
+
'TBLPROPERTIES ("skip.header.line.count"="1");'
|
|
143
|
+
debug(query)
|
|
144
|
+
try:
|
|
145
|
+
Athena.query(query, database)
|
|
146
|
+
except Exception as e:
|
|
147
|
+
log2(f'*** Failed query:\n{query}')
|
|
148
|
+
raise e
|