kaqing 2.0.14__py3-none-any.whl → 2.0.145__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kaqing might be problematic. Click here for more details.

Files changed (163) hide show
  1. adam/apps.py +2 -2
  2. adam/batch.py +13 -3
  3. adam/checks/check_utils.py +4 -4
  4. adam/checks/compactionstats.py +1 -1
  5. adam/checks/cpu.py +2 -2
  6. adam/checks/disk.py +1 -1
  7. adam/checks/gossip.py +1 -1
  8. adam/checks/memory.py +3 -3
  9. adam/checks/status.py +1 -1
  10. adam/commands/alter_tables.py +81 -0
  11. adam/commands/app.py +3 -3
  12. adam/commands/app_ping.py +2 -2
  13. adam/commands/audit/audit.py +86 -0
  14. adam/commands/audit/audit_repair_tables.py +77 -0
  15. adam/commands/audit/audit_run.py +58 -0
  16. adam/commands/audit/show_last10.py +51 -0
  17. adam/commands/audit/show_slow10.py +50 -0
  18. adam/commands/audit/show_top10.py +48 -0
  19. adam/commands/audit/utils_show_top10.py +59 -0
  20. adam/commands/bash/__init__.py +0 -0
  21. adam/commands/bash/bash.py +133 -0
  22. adam/commands/bash/bash_completer.py +93 -0
  23. adam/commands/cat.py +56 -0
  24. adam/commands/cd.py +12 -82
  25. adam/commands/check.py +6 -0
  26. adam/commands/cli_commands.py +3 -3
  27. adam/commands/code.py +60 -0
  28. adam/commands/command.py +48 -12
  29. adam/commands/commands_utils.py +4 -5
  30. adam/commands/cql/__init__.py +0 -0
  31. adam/commands/cql/cql_completions.py +28 -0
  32. adam/commands/cql/cql_utils.py +209 -0
  33. adam/commands/{cqlsh.py → cql/cqlsh.py} +15 -10
  34. adam/commands/deploy/code_utils.py +2 -2
  35. adam/commands/deploy/deploy.py +8 -21
  36. adam/commands/deploy/deploy_frontend.py +1 -1
  37. adam/commands/deploy/deploy_pg_agent.py +3 -3
  38. adam/commands/deploy/deploy_pod.py +28 -27
  39. adam/commands/deploy/deploy_utils.py +16 -26
  40. adam/commands/deploy/undeploy.py +8 -21
  41. adam/commands/deploy/undeploy_frontend.py +1 -1
  42. adam/commands/deploy/undeploy_pg_agent.py +5 -3
  43. adam/commands/deploy/undeploy_pod.py +12 -10
  44. adam/commands/devices/__init__.py +0 -0
  45. adam/commands/devices/device.py +27 -0
  46. adam/commands/devices/device_app.py +146 -0
  47. adam/commands/devices/device_auit_log.py +43 -0
  48. adam/commands/devices/device_cass.py +145 -0
  49. adam/commands/devices/device_export.py +86 -0
  50. adam/commands/devices/device_postgres.py +109 -0
  51. adam/commands/devices/devices.py +25 -0
  52. adam/commands/export/__init__.py +0 -0
  53. adam/commands/export/clean_up_export_session.py +53 -0
  54. adam/commands/export/clean_up_export_sessions.py +40 -0
  55. adam/commands/export/drop_export_database.py +58 -0
  56. adam/commands/export/drop_export_databases.py +46 -0
  57. adam/commands/export/export.py +83 -0
  58. adam/commands/export/export_databases.py +170 -0
  59. adam/commands/export/export_select.py +85 -0
  60. adam/commands/export/export_select_x.py +54 -0
  61. adam/commands/export/export_use.py +55 -0
  62. adam/commands/export/exporter.py +364 -0
  63. adam/commands/export/import_session.py +68 -0
  64. adam/commands/export/importer.py +67 -0
  65. adam/commands/export/importer_athena.py +80 -0
  66. adam/commands/export/importer_sqlite.py +47 -0
  67. adam/commands/export/show_column_counts.py +63 -0
  68. adam/commands/export/show_export_databases.py +39 -0
  69. adam/commands/export/show_export_session.py +51 -0
  70. adam/commands/export/show_export_sessions.py +47 -0
  71. adam/commands/export/utils_export.py +291 -0
  72. adam/commands/help.py +12 -7
  73. adam/commands/issues.py +6 -0
  74. adam/commands/kubectl.py +41 -0
  75. adam/commands/login.py +7 -4
  76. adam/commands/logs.py +2 -1
  77. adam/commands/ls.py +4 -107
  78. adam/commands/medusa/medusa.py +2 -26
  79. adam/commands/medusa/medusa_backup.py +2 -2
  80. adam/commands/medusa/medusa_restore.py +3 -4
  81. adam/commands/medusa/medusa_show_backupjobs.py +4 -3
  82. adam/commands/medusa/medusa_show_restorejobs.py +3 -3
  83. adam/commands/nodetool.py +9 -4
  84. adam/commands/param_set.py +1 -1
  85. adam/commands/postgres/postgres.py +42 -43
  86. adam/commands/postgres/{postgres_session.py → postgres_context.py} +43 -42
  87. adam/commands/postgres/postgres_utils.py +31 -0
  88. adam/commands/postgres/psql_completions.py +10 -0
  89. adam/commands/preview_table.py +18 -40
  90. adam/commands/pwd.py +2 -28
  91. adam/commands/reaper/reaper.py +4 -24
  92. adam/commands/reaper/reaper_restart.py +1 -1
  93. adam/commands/reaper/reaper_session.py +2 -2
  94. adam/commands/repair/repair.py +3 -27
  95. adam/commands/repair/repair_log.py +1 -1
  96. adam/commands/repair/repair_run.py +2 -2
  97. adam/commands/repair/repair_scan.py +1 -1
  98. adam/commands/repair/repair_stop.py +1 -1
  99. adam/commands/report.py +6 -0
  100. adam/commands/restart.py +2 -2
  101. adam/commands/rollout.py +1 -1
  102. adam/commands/show/show.py +11 -26
  103. adam/commands/show/show_app_actions.py +3 -0
  104. adam/commands/show/show_app_id.py +1 -1
  105. adam/commands/show/show_app_queues.py +3 -2
  106. adam/commands/show/show_cassandra_status.py +3 -3
  107. adam/commands/show/show_cassandra_version.py +3 -3
  108. adam/commands/show/show_host.py +33 -0
  109. adam/commands/show/show_login.py +3 -0
  110. adam/commands/show/show_processes.py +1 -1
  111. adam/commands/show/show_repairs.py +2 -2
  112. adam/commands/show/show_storage.py +1 -1
  113. adam/commands/watch.py +1 -1
  114. adam/config.py +16 -3
  115. adam/embedded_params.py +1 -1
  116. adam/pod_exec_result.py +10 -2
  117. adam/repl.py +127 -117
  118. adam/repl_commands.py +51 -16
  119. adam/repl_state.py +276 -55
  120. adam/sql/__init__.py +0 -0
  121. adam/sql/sql_completer.py +120 -0
  122. adam/sql/sql_state_machine.py +617 -0
  123. adam/sql/term_completer.py +76 -0
  124. adam/sso/authn_ad.py +1 -1
  125. adam/sso/cred_cache.py +1 -1
  126. adam/sso/idp.py +1 -1
  127. adam/utils.py +83 -2
  128. adam/utils_athena.py +145 -0
  129. adam/utils_audits.py +102 -0
  130. adam/utils_k8s/__init__.py +0 -0
  131. adam/utils_k8s/app_clusters.py +33 -0
  132. adam/utils_k8s/app_pods.py +31 -0
  133. adam/{k8s_utils → utils_k8s}/cassandra_clusters.py +6 -21
  134. adam/{k8s_utils → utils_k8s}/cassandra_nodes.py +12 -5
  135. adam/{k8s_utils → utils_k8s}/deployment.py +2 -2
  136. adam/{k8s_utils → utils_k8s}/kube_context.py +1 -1
  137. adam/{k8s_utils → utils_k8s}/pods.py +119 -26
  138. adam/{k8s_utils → utils_k8s}/secrets.py +4 -0
  139. adam/{k8s_utils → utils_k8s}/statefulsets.py +5 -4
  140. adam/utils_net.py +24 -0
  141. adam/utils_repl/__init__.py +0 -0
  142. adam/utils_repl/automata_completer.py +48 -0
  143. adam/utils_repl/repl_completer.py +46 -0
  144. adam/utils_repl/state_machine.py +173 -0
  145. adam/utils_sqlite.py +101 -0
  146. adam/version.py +1 -1
  147. {kaqing-2.0.14.dist-info → kaqing-2.0.145.dist-info}/METADATA +1 -1
  148. kaqing-2.0.145.dist-info/RECORD +227 -0
  149. adam/commands/bash.py +0 -87
  150. adam/commands/cql_utils.py +0 -53
  151. adam/commands/devices.py +0 -89
  152. kaqing-2.0.14.dist-info/RECORD +0 -167
  153. /adam/{k8s_utils → commands/audit}/__init__.py +0 -0
  154. /adam/{k8s_utils → utils_k8s}/config_maps.py +0 -0
  155. /adam/{k8s_utils → utils_k8s}/custom_resources.py +0 -0
  156. /adam/{k8s_utils → utils_k8s}/ingresses.py +0 -0
  157. /adam/{k8s_utils → utils_k8s}/jobs.py +0 -0
  158. /adam/{k8s_utils → utils_k8s}/service_accounts.py +0 -0
  159. /adam/{k8s_utils → utils_k8s}/services.py +0 -0
  160. /adam/{k8s_utils → utils_k8s}/volumes.py +0 -0
  161. {kaqing-2.0.14.dist-info → kaqing-2.0.145.dist-info}/WHEEL +0 -0
  162. {kaqing-2.0.14.dist-info → kaqing-2.0.145.dist-info}/entry_points.txt +0 -0
  163. {kaqing-2.0.14.dist-info → kaqing-2.0.145.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,364 @@
1
+ from concurrent.futures import ThreadPoolExecutor, as_completed
2
+ from datetime import datetime
3
+ import functools
4
+ import re
5
+ import time
6
+ import traceback
7
+
8
+ from adam.commands.cql.cql_utils import cassandra_table_names, run_cql, table_spec
9
+ from adam.commands.export.export_databases import ExportDatabases
10
+ from adam.commands.export.importer import Importer
11
+ from adam.commands.export.importer_athena import AthenaImporter
12
+ from adam.commands.export.importer_sqlite import SqliteImporter
13
+ from adam.commands.export.utils_export import ExportSpec, ExportTableStatus, ExportTableSpec, ImportSpec, csv_dir, find_files
14
+ from adam.config import Config
15
+ from adam.pod_exec_result import PodExecResult
16
+ from adam.repl_state import ReplState
17
+ from adam.utils import elapsed_time, log2, ing
18
+ from adam.utils_k8s.cassandra_nodes import CassandraNodes
19
+ from adam.utils_k8s.pods import log_prefix
20
+ from adam.utils_k8s.statefulsets import StatefulSets
21
+
22
+ class Exporter:
23
+ def export_tables(args: list[str], state: ReplState, export_only: bool = False, max_workers = 0) -> tuple[list[str], ExportSpec]:
24
+ if export_only:
25
+ log2('export-only for testing')
26
+
27
+ spec: ExportSpec = None
28
+ try:
29
+ spec = Exporter.export_spec(' '.join(args), state)
30
+
31
+ statuses, spec = Exporter._export_tables(spec, state, max_workers=max_workers, export_state='init')
32
+ if not statuses:
33
+ return statuses, spec
34
+
35
+ return Exporter._export_tables(spec, state, export_only, max_workers, 'pending_export')
36
+ except Exception as e:
37
+ log2(e)
38
+
39
+ return [], None
40
+
41
+ def export_spec(spec_str: str, state: ReplState):
42
+ spec: ExportSpec = ExportSpec.parse_specs(spec_str)
43
+
44
+ session = state.export_session
45
+ if session:
46
+ if spec.importer:
47
+ importer_from_session = Importer.importer_from_session(session)
48
+ if spec.importer != importer_from_session:
49
+ if spec.importer == 'csv':
50
+ prefix = Importer.prefix_from_importer(spec.importer)
51
+ session = f'{prefix}{session[1:]}'
52
+ else:
53
+ raise Exception(f"You're currently using {importer_from_session} export database. You cannot export tables with {spec.importer} type database.")
54
+ else:
55
+ spec.importer = Importer.importer_from_session(session)
56
+ else:
57
+ if not spec.importer:
58
+ spec.importer = Config().get('export.default-importer', 'sqlite')
59
+
60
+ prefix = Importer.prefix_from_importer(spec.importer)
61
+ session = f'{prefix}{datetime.now().strftime("%Y%m%d%H%M%S")[3:]}'
62
+ if spec.importer != 'csv':
63
+ state.export_session = session
64
+
65
+ spec.session = session
66
+
67
+ return spec
68
+
69
+ def import_session(args: list[str], state: ReplState, max_workers = 0) -> tuple[list[str], ExportSpec]:
70
+ import_spec: ImportSpec = None
71
+ try:
72
+ import_spec = Exporter.import_spec(' '.join(args), state)
73
+ tables, status_in_whole = ExportTableStatus.from_session(state.sts, state.pod, state.namespace, import_spec.session)
74
+ if status_in_whole == 'done':
75
+ log2(f'The session has been completely done - no more csv files are found.')
76
+ return [], ExportSpec(None, None, importer=import_spec.importer, tables=[])
77
+
78
+ spec = ExportSpec(None, None, importer=import_spec.importer, tables=[ExportTableSpec.from_status(table) for table in tables], session=import_spec.session)
79
+
80
+ return Exporter._export_tables(spec, state, max_workers=max_workers)
81
+ except Exception as e:
82
+ if Config().is_debug():
83
+ traceback.print_exception(e)
84
+ else:
85
+ log2(e)
86
+
87
+ return [], None
88
+
89
+ def import_spec(spec_str: str, state: ReplState):
90
+ spec: ImportSpec = ImportSpec.parse_specs(spec_str)
91
+
92
+ session = state.export_session
93
+ if session:
94
+ if spec.importer:
95
+ importer = Importer.importer_from_session(state.export_session)
96
+ if spec.importer != importer:
97
+ raise Exception(f"You're currently using {importer} export database. You cannot import to {spec.importer} type database.")
98
+ else:
99
+ spec.importer = Importer.importer_from_session(state.export_session)
100
+ if not spec.importer:
101
+ spec.importer = Config().get('export.default-importer', 'sqlite')
102
+ else:
103
+ if spec.importer:
104
+ if not AthenaImporter.ping():
105
+ raise Exception('Credentials for Athena are not present.')
106
+ else:
107
+ spec.importer = Importer.importer_from_session(spec.session)
108
+
109
+ if spec.importer == 'csv':
110
+ spec.importer = Config().get('export.default-importer', 'sqlite')
111
+
112
+ prefix = Importer.prefix_from_importer(spec.importer)
113
+ session = f'{prefix}{spec.session[1:]}'
114
+ state.export_session = session
115
+
116
+ return spec
117
+
118
+ def _export_tables(spec: ExportSpec, state: ReplState, export_only = False, max_workers = 0, export_state = None) -> tuple[list[str], ExportSpec]:
119
+ if not spec.keyspace:
120
+ spec.keyspace = f'{state.namespace}_db'
121
+
122
+ if not spec.tables:
123
+ spec.tables = [ExportTableSpec.parse(t) for t in cassandra_table_names(state, keyspace=spec.keyspace)]
124
+
125
+ if not max_workers:
126
+ max_workers = Config().action_workers(f'export.{spec.importer}', 8)
127
+
128
+ if export_state == 'init':
129
+ CassandraNodes.exec(state.pod, state.namespace, f'rm -rf {csv_dir()}/{spec.session}_*', show_out=Config().is_debug(), shell='bash')
130
+
131
+ if max_workers > 1 and len(spec.tables) > 1:
132
+ log2(f'Executing on {len(spec.tables)} Cassandra tables in parallel...')
133
+ start_time = time.time()
134
+ try:
135
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
136
+ futures = [executor.submit(Exporter.export_table, table, state, spec.session, spec.importer, export_only, True, consistency=spec.consistency, export_state=export_state) for table in spec.tables]
137
+ if len(futures) == 0:
138
+ return [], spec
139
+
140
+ return [future.result() for future in as_completed(futures)], spec
141
+ finally:
142
+ log2(f"{len(spec.tables)} parallel table export elapsed time: {elapsed_time(start_time)} with {max_workers} workers")
143
+ else:
144
+ return [Exporter.export_table(table, state, spec.session, spec.importer, export_only, multi_tables=len(spec.tables) > 1, consistency=spec.consistency, export_state=export_state) for table in spec.tables], spec
145
+
146
+ def export_table(spec: ExportTableSpec, state: ReplState, session: str, importer: str, export_only = False, multi_tables = True, consistency: str = None, export_state=None):
147
+ s: str = None
148
+
149
+ table, target_table, columns = Exporter.resove_table_n_columns(spec, state, include_ks_in_target=False, importer=importer)
150
+
151
+ log_file = f'{log_prefix()}-{session}_{spec.keyspace}.{target_table}.log'
152
+ create_db = not state.export_session
153
+
154
+ if export_state == 'init':
155
+ Exporter.create_table_log(spec, state, session, table, target_table)
156
+ return 'table_log_created'
157
+ else:
158
+ if export_state == 'pending_export':
159
+ Exporter.export_to_csv(spec, state, session, table, target_table, columns, multi_tables=multi_tables, consistency=consistency)
160
+
161
+ log_files: list[str] = find_files(state.pod, state.namespace, f'{log_file}*')
162
+ if not log_files:
163
+ return s
164
+
165
+ log_file = log_files[0]
166
+
167
+ status: ExportTableStatus = ExportTableStatus.from_log_file(state.pod, state.namespace, session, log_file)
168
+ while status.status != 'done':
169
+ if status.status == 'export_in_pregress':
170
+ if Config().is_debug():
171
+ log2('Exporting to CSV is still in progess, sleeping for 1 sec...')
172
+ time.sleep(1)
173
+ elif status.status == 'exported':
174
+ log_file = Exporter.rename_to_pending_import(spec, state, session, target_table)
175
+ if importer == 'csv' or export_only:
176
+ return 'pending_import'
177
+ elif status.status == 'pending_import':
178
+ log_file, session = Exporter.import_from_csv(spec, state, session, importer, table, target_table, columns, multi_tables=multi_tables, create_db=create_db)
179
+
180
+ status = ExportTableStatus.from_log_file(state.pod, state.namespace, session, log_file)
181
+
182
+ return status.status
183
+
184
+ def create_table_log(spec: ExportTableSpec, state: ReplState, session: str, table: str, target_table: str):
185
+ log_file = f'{log_prefix()}-{session}_{spec.keyspace}.{target_table}.log'
186
+
187
+ CassandraNodes.exec(state.pod, state.namespace, f'rm -f {log_file}* && touch {log_file}', show_out=Config().is_debug(), shell='bash')
188
+
189
+ return table
190
+
191
+ def export_to_csv(spec: ExportTableSpec, state: ReplState, session: str, table: str, target_table: str, columns: str, multi_tables = True, consistency: str = None):
192
+ db = f'{session}_{target_table}'
193
+
194
+ CassandraNodes.exec(state.pod, state.namespace, f'mkdir -p {csv_dir()}/{db}', show_out=Config().is_debug(), shell='bash')
195
+ csv_file = f'{csv_dir()}/{db}/{table}.csv'
196
+ log_file = f'{log_prefix()}-{session}_{spec.keyspace}.{target_table}.log'
197
+
198
+ suppress_ing_log = Config().is_debug() or multi_tables
199
+ queries = []
200
+ if consistency:
201
+ queries.append(f'CONSISTENCY {consistency}')
202
+ queries.append(f"COPY {spec.keyspace}.{table}({columns}) TO '{csv_file}' WITH HEADER = TRUE")
203
+ r: PodExecResult = ing(
204
+ f'[{session}] Dumping table {spec.keyspace}.{table}{f" with consistency {consistency}" if consistency else ""}',
205
+ lambda: run_cql(state, ';'.join(queries), show_out=Config().is_debug(), background=True, log_file=log_file),
206
+ suppress_log=suppress_ing_log)
207
+
208
+ return log_file
209
+
210
+ def rename_to_pending_import(spec: ExportTableSpec, state: ReplState, session: str, target_table: str):
211
+ log_file = f'{log_prefix()}-{session}_{spec.keyspace}.{target_table}.log'
212
+ to = f'{log_file}.pending_import'
213
+
214
+ CassandraNodes.exec(state.pod, state.namespace, f'mv {log_file} {to}', show_out=Config().is_debug(), shell='bash')
215
+
216
+ return to
217
+
218
+ def import_from_csv(spec: ExportTableSpec, state: ReplState, session: str, importer: str, table: str, target_table: str, columns: str, multi_tables = True, create_db = False):
219
+ im = AthenaImporter() if importer == 'athena' else SqliteImporter()
220
+ return im.import_from_csv(state.pod, state.namespace, state.export_session, session if session else state.export_session, spec.keyspace, table, target_table, columns, multi_tables, create_db)
221
+
222
+ def clear_export_session_cache():
223
+ Exporter.find_export_sessions.cache_clear()
224
+ Exporter.export_session_names.cache_clear()
225
+
226
+ @functools.lru_cache()
227
+ def export_session_names(sts: str, pod: str, namespace: str, importer: str = None, export_state = None):
228
+ if not sts or not namespace:
229
+ return []
230
+
231
+ if not pod:
232
+ pod = StatefulSets.pod_names(sts, namespace)[0]
233
+
234
+ if not pod:
235
+ return []
236
+
237
+ return [session for session, state in Exporter.find_export_sessions(pod, namespace, importer).items() if not export_state or state == export_state]
238
+
239
+ @functools.lru_cache()
240
+ def find_export_sessions(pod: str, namespace: str, importer: str = None, limit = 100):
241
+ sessions: dict[str, str] = {}
242
+
243
+ prefix = Importer.prefix_from_importer(importer)
244
+
245
+ log_files: list[str] = find_files(pod, namespace, f'{log_prefix()}-{prefix}*_*.log*')
246
+
247
+ if not log_files:
248
+ return {}
249
+
250
+ for log_file in log_files[:limit]:
251
+ m = re.match(f'{log_prefix()}-(.*?)_.*\.log?(.*)', log_file)
252
+ if m:
253
+ s = m.group(1)
254
+ state = m.group(2) # '', '.pending_import', '.done'
255
+ if state:
256
+ state = state.strip('.')
257
+ else:
258
+ state = 'in_export'
259
+
260
+ if s not in sessions:
261
+ sessions[s] = state
262
+ elif sessions[s] == 'done' and state != 'done':
263
+ sessions[s] = state
264
+
265
+ return sessions
266
+
267
+ def clean_up_all_sessions(sts: str, pod: str, namespace: str):
268
+ if not sts or not namespace:
269
+ return False
270
+
271
+ if not pod:
272
+ pod = StatefulSets.pod_names(sts, namespace)[0]
273
+
274
+ CassandraNodes.exec(pod, namespace, f'rm -rf {csv_dir()}/*', show_out=Config().is_debug(), shell='bash')
275
+ CassandraNodes.exec(pod, namespace, f'rm -rf {log_prefix()}-*.log*', show_out=Config().is_debug(), shell='bash')
276
+
277
+ return True
278
+
279
+ def clean_up_sessions(sts: str, pod: str, namespace: str, sessions: list[str], max_workers = 0):
280
+ if not sessions:
281
+ return []
282
+
283
+ if not max_workers:
284
+ max_workers = Config().action_workers('export', 8)
285
+
286
+ if max_workers > 1 and len(sessions) > 1:
287
+ log2(f'Executing on {len(sessions)} export session clean ups in parallel...')
288
+ start_time = time.time()
289
+ try:
290
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
291
+ futures = [executor.submit(Exporter.clean_up_session, sts, pod, namespace, session, True) for session in sessions]
292
+ if len(futures) == 0:
293
+ return []
294
+
295
+ return [future.result() for future in as_completed(futures)]
296
+ finally:
297
+ log2(f"{len(sessions)} parallel session clean ups elapsed time: {elapsed_time(start_time)} with {max_workers} workers")
298
+ else:
299
+ return [Exporter.clean_up_session(sts, pod, namespace, session) for session in sessions]
300
+
301
+ def clean_up_session(sts: str, pod: str, namespace: str, session: str, multi_tables = True):
302
+ if not sts or not namespace:
303
+ return 0, 0
304
+
305
+ if not pod:
306
+ pod = StatefulSets.pod_names(sts, namespace)[0]
307
+
308
+ if not pod:
309
+ return 0, 0
310
+
311
+ csv_cnt = 0
312
+ log_cnt = 0
313
+
314
+ log_files: list[str] = find_files(pod, namespace, f'{log_prefix()}-{session}_*.log*')
315
+
316
+ for log_file in log_files:
317
+ m = re.match(f'{log_prefix()}-{session}_(.*?)\.(.*?)\.log.*', log_file)
318
+ if m:
319
+ table = m.group(2)
320
+
321
+ CassandraNodes.exec(pod, namespace, f'rm -rf {csv_dir()}/{session}_{table}', show_out=not multi_tables, shell='bash')
322
+ csv_cnt += 1
323
+
324
+ CassandraNodes.exec(pod, namespace, f'rm -rf {log_file}', show_out=not multi_tables, shell='bash')
325
+ log_cnt += 1
326
+
327
+ return csv_cnt, log_cnt
328
+
329
+ def resove_table_n_columns(spec: ExportTableSpec, state: ReplState, include_ks_in_target = False, importer = 'sqlite'):
330
+ table = spec.table
331
+ columns = spec.columns
332
+ if not columns:
333
+ columns = Config().get(f'export.{importer}.columns', f'<keys>')
334
+
335
+ keyspaced_table = f'{spec.keyspace}.{spec.table}'
336
+ if columns == '<keys>':
337
+ columns = ','.join(table_spec(state, keyspaced_table, on_any=True).keys())
338
+ elif columns == '<row-key>':
339
+ columns = table_spec(state, keyspaced_table, on_any=True).row_key()
340
+ elif columns == '*':
341
+ columns = ','.join([c.name for c in table_spec(state, keyspaced_table, on_any=True).columns])
342
+
343
+ if not columns:
344
+ log2(f'ERROR: Empty columns on {table}.')
345
+ return table, None, None
346
+
347
+ target_table = spec.target_table if spec.target_table else table
348
+ if not include_ks_in_target and '.' in target_table:
349
+ target_table = target_table.split('.')[-1]
350
+
351
+ return table, target_table, columns
352
+
353
+ def drop_databases(sts: str, pod: str, namespace: str, db: str = None):
354
+ importer = None
355
+ if db:
356
+ importer = Importer.importer_from_session(db)
357
+
358
+ sessions_done = Exporter.export_session_names(sts, pod, namespace, importer=importer, export_state='done')
359
+ sessions = ExportDatabases.sessions_from_dbs(ExportDatabases.drop_export_dbs(db))
360
+ if sessions_done and sessions:
361
+ intersects = list(set(sessions_done) & set(sessions))
362
+ with ing(f'Cleaning up {len(intersects)} completed sessions'):
363
+ Exporter.clean_up_sessions(sts, pod, namespace, list(intersects))
364
+ Exporter.clear_export_session_cache()
@@ -0,0 +1,68 @@
1
+ from adam.commands.command import Command
2
+ from adam.commands.export.export_databases import ExportDatabases
3
+ from adam.commands.export.exporter import Exporter
4
+ from adam.repl_state import ReplState, RequiredState
5
+ from adam.utils import log, log2
6
+ from adam.utils_k8s.statefulsets import StatefulSets
7
+
8
+ class ImportSession(Command):
9
+ COMMAND = 'import session'
10
+
11
+ # the singleton pattern
12
+ def __new__(cls, *args, **kwargs):
13
+ if not hasattr(cls, 'instance'): cls.instance = super(ImportSession, cls).__new__(cls)
14
+
15
+ return cls.instance
16
+
17
+ def __init__(self, successor: Command=None):
18
+ super().__init__(successor)
19
+
20
+ def command(self):
21
+ return ImportSession.COMMAND
22
+
23
+ def required(self):
24
+ return RequiredState.CLUSTER_OR_POD
25
+
26
+ def run(self, cmd: str, state: ReplState):
27
+ if not(args := self.args(cmd)):
28
+ return super().run(cmd, state)
29
+
30
+ state, args = self.apply_state(args, state)
31
+ if not self.validate_state(state):
32
+ return state
33
+
34
+ if not args:
35
+ if state.in_repl:
36
+ log2('Specify export session name.')
37
+ else:
38
+ log2('* Export session name is missing.')
39
+
40
+ Command.display_help()
41
+
42
+ return 'command-missing'
43
+
44
+ if not state.pod:
45
+ state.push()
46
+ state.pod = StatefulSets.pod_names(state.sts, state.namespace)[0]
47
+
48
+ try:
49
+ tables, _ = Exporter.import_session(args, state)
50
+ if tables:
51
+ Exporter.clear_export_session_cache()
52
+
53
+ log()
54
+ ExportDatabases.display_export_db(state.export_session)
55
+ finally:
56
+ state.pop()
57
+
58
+ return state
59
+
60
+ def completion(self, state: ReplState):
61
+ # warm up cache
62
+ Exporter.export_session_names(state.sts, state.pod, state.namespace)
63
+ Exporter.export_session_names(state.sts, state.pod, state.namespace, export_state='pending_import')
64
+
65
+ return {}
66
+
67
+ def help(self, _: ReplState):
68
+ return f'{ImportSession.COMMAND} <export-session-name>\t import files in session to Athena or SQLite'
@@ -0,0 +1,67 @@
1
+ from abc import abstractmethod
2
+
3
+ from adam.commands.export.utils_export import csv_dir
4
+ from adam.config import Config
5
+ from adam.utils import ing
6
+ from adam.utils_k8s.cassandra_nodes import CassandraNodes
7
+ from adam.utils_k8s.pods import log_prefix
8
+
9
+ class Importer:
10
+ @abstractmethod
11
+ def prefix(self):
12
+ pass
13
+
14
+ @abstractmethod
15
+ def import_from_csv(self, pod: str, namespace: str, to_session: str, from_session: str, keyspace: str, table: str, target_table: str, columns: str, multi_tables = True, create_db = False):
16
+ pass
17
+
18
+ def move_to_done(self, pod: str, namespace: str, to_session: str, from_session: str, keyspace: str, target_table: str):
19
+ log_file = f'{log_prefix()}-{from_session}_{keyspace}.{target_table}.log.pending_import'
20
+
21
+ to = f'{log_prefix()}-{to_session}_{keyspace}.{target_table}.log.done'
22
+
23
+ CassandraNodes.exec(pod, namespace, f'mv {log_file} {to}', show_out=Config().is_debug(), shell='bash')
24
+
25
+ return to, to_session
26
+
27
+ def prefix_adjusted_session(self, session: str):
28
+ if not session.startswith(self.prefix()):
29
+ return f'{self.prefix()}{session[1:]}'
30
+
31
+ return session
32
+
33
+ def remove_csv(self, pod: str, namespace: str, session: str, table: str, target_table: str, multi_tables = True):
34
+ with ing(f'[{session}] Cleaning up temporary files', suppress_log=multi_tables):
35
+ CassandraNodes.exec(pod, namespace, f'rm -rf {self.csv_file(session, table, target_table)}', show_out=Config().is_debug(), shell='bash')
36
+
37
+ def db(self, session: str, keyspace: str):
38
+ return f'{session}_{keyspace}'
39
+
40
+ def csv_file(self, session: str, table: str, target_table: str):
41
+ return f'{csv_dir()}/{session}_{target_table}/{table}.csv'
42
+
43
+ def prefix_from_importer(importer: str = ''):
44
+ if not importer:
45
+ return ''
46
+
47
+ prefix = 's'
48
+
49
+ if importer == 'athena':
50
+ prefix = 'e'
51
+ elif importer == 'csv':
52
+ prefix = 'c'
53
+
54
+ return prefix
55
+
56
+ def importer_from_session(session: str):
57
+ if not session:
58
+ return None
59
+
60
+ importer = 'csv'
61
+
62
+ if session.startswith('s'):
63
+ importer = 'sqlite'
64
+ elif session.startswith('e'):
65
+ importer = 'athena'
66
+
67
+ return importer
@@ -0,0 +1,80 @@
1
+ import boto3
2
+
3
+ from adam.commands.export.importer import Importer
4
+ from adam.commands.export.utils_export import GeneratorStream
5
+ from adam.config import Config
6
+ from adam.utils import log2, ing
7
+ from adam.utils_athena import Athena
8
+ from adam.utils_k8s.pods import Pods
9
+
10
+ class AthenaImporter(Importer):
11
+ def ping():
12
+ session = boto3.session.Session()
13
+ credentials = session.get_credentials()
14
+
15
+ return credentials is not None
16
+
17
+ def prefix(self):
18
+ return 'e'
19
+
20
+ def import_from_csv(self, pod: str, namespace: str, to_session: str, from_session: str, keyspace: str, table: str, target_table: str, columns: str, multi_tables = True, create_db = False):
21
+ csv_file = self.csv_file(from_session, table, target_table)
22
+ db = self.db(to_session, keyspace)
23
+
24
+ succeeded = False
25
+ try:
26
+ bucket = Config().get('export.bucket', 'c3.ops--qing')
27
+
28
+ with ing(f'[{to_session}] Uploading to S3', suppress_log=multi_tables):
29
+ bytes = Pods.read_file(pod, 'cassandra', namespace, csv_file)
30
+
31
+ s3 = boto3.client('s3')
32
+ s3.upload_fileobj(GeneratorStream(bytes), bucket, f'export/{db}/{keyspace}/{target_table}/{table}.csv')
33
+
34
+ msg: str = None
35
+ if create_db:
36
+ msg = f"[{to_session}] Creating database {db}"
37
+ else:
38
+ msg = f"[{to_session}] Creating table {target_table}"
39
+ with ing(msg, suppress_log=multi_tables):
40
+ query = f'CREATE DATABASE IF NOT EXISTS {db};'
41
+ if Config().is_debug():
42
+ log2(query)
43
+ Athena.query(query, 'default')
44
+
45
+ query = f'DROP TABLE IF EXISTS {target_table};'
46
+ if Config().is_debug():
47
+ log2(query)
48
+ Athena.query(query, db)
49
+
50
+ athena_columns = ', '.join([f'{c} string' for c in columns.split(',')])
51
+ query = f'CREATE EXTERNAL TABLE IF NOT EXISTS {target_table}(\n' + \
52
+ f' {athena_columns})\n' + \
53
+ "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'\n" + \
54
+ 'WITH SERDEPROPERTIES (\n' + \
55
+ ' "separatorChar" = ",",\n' + \
56
+ ' "quoteChar" = "\\"")\n' + \
57
+ f"LOCATION 's3://{bucket}/export/{db}/{keyspace}/{target_table}'\n" + \
58
+ 'TBLPROPERTIES ("skip.header.line.count"="1");'
59
+ if Config().is_debug():
60
+ log2(query)
61
+ try:
62
+ Athena.query(query, db)
63
+ except Exception as e:
64
+ log2(f'*** Failed query:\n{query}')
65
+ raise e
66
+
67
+ to, _ = self.move_to_done(pod, namespace, to_session, from_session, keyspace, target_table)
68
+
69
+ succeeded = True
70
+
71
+ return to, to_session
72
+ finally:
73
+ if succeeded:
74
+ self.remove_csv(pod, namespace, from_session, table, target_table, multi_tables)
75
+ Athena.clear_cache()
76
+
77
+ if not multi_tables:
78
+ query = f'select * from {target_table} limit 10'
79
+ log2(query)
80
+ Athena.run_query(query, db)
@@ -0,0 +1,47 @@
1
+ import os
2
+ import sqlite3
3
+ import pandas
4
+
5
+ from adam.commands.export.importer import Importer
6
+ from adam.commands.export.utils_export import GeneratorStream
7
+ from adam.utils import log2, ing
8
+ from adam.utils_k8s.pods import Pods
9
+ from adam.utils_sqlite import SQLite
10
+
11
+ class SqliteImporter(Importer):
12
+ def prefix(self):
13
+ return 's'
14
+
15
+ def import_from_csv(self, pod: str, namespace: str, to_session: str, from_session: str, keyspace: str, table: str, target_table: str, columns: str, multi_tables = True, create_db = False):
16
+ csv_file = self.csv_file(from_session, table, target_table)
17
+ db = self.db(to_session, keyspace)
18
+
19
+ succeeded = False
20
+ conn = None
21
+ try:
22
+ os.makedirs(SQLite.local_db_dir(), exist_ok=True)
23
+ conn = sqlite3.connect(f'{SQLite.local_db_dir()}/{db}.db')
24
+
25
+ with ing(f'[{to_session}] Uploading to Sqlite', suppress_log=multi_tables):
26
+ bytes = Pods.read_file(pod, 'cassandra', namespace, csv_file)
27
+ df = pandas.read_csv(GeneratorStream(bytes))
28
+
29
+ df.to_sql(target_table, conn, index=False, if_exists='replace')
30
+
31
+ to, _ = self.move_to_done(pod, namespace, to_session, from_session, keyspace, target_table)
32
+
33
+ succeeded = True
34
+
35
+ return to, to_session
36
+ finally:
37
+ if succeeded:
38
+ self.remove_csv(pod, namespace, from_session, table, target_table, multi_tables)
39
+ SQLite.clear_cache()
40
+
41
+ if not multi_tables:
42
+ query = f'select * from {target_table} limit 10'
43
+ log2(query)
44
+ SQLite.run_query(query, conn_passed=conn)
45
+
46
+ if conn:
47
+ conn.close()