py-pve-cloud-backup 0.5.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,415 @@
1
+ import logging
2
+ import subprocess
3
+ import time
4
+ import os
5
+ import shutil
6
+ from tinydb import TinyDB, Query
7
+ import json
8
+ import paramiko
9
+ import base64
10
+ import re
11
+ import pickle
12
+ import base64
13
+ import uuid
14
+ from kubernetes import client
15
+ from kubernetes.client.rest import ApiException
16
+ from pprint import pformat
17
+ import fnmatch
18
+
19
+
20
+ logger = logging.getLogger("bdd")
21
+
22
+ os.environ["BORG_UNKNOWN_UNENCRYPTED_REPO_ACCESS_IS_OK"] = "yes" # we need this to stop borg cli from manual prompting
23
+ os.environ["BORG_RELOCATED_REPO_ACCESS_IS_OK"] = "yes"
24
+
25
+ ENV = os.getenv("ENV", "TESTING")
26
+
27
+ # constants
28
+ BACKUP_BASE_DIR = os.getenv("BACKUP_BASE_DIR", "/tmp/pve-cloud-test-backup")
29
+
30
+ IMAGE_META_DB_PATH = f"{BACKUP_BASE_DIR}/image-meta-db.json"
31
+ STACK_META_DB_PATH = f"{BACKUP_BASE_DIR}/stack-meta-db.json"
32
+
33
+ def group_image_metas(metas, type_keys, group_key, stack_filter=None):
34
+ metas_grouped = {}
35
+
36
+ # group metas by vmid
37
+ for meta in metas:
38
+ logger.debug(f"meta {meta}")
39
+
40
+ # kind of redundant right now since we only process k8s
41
+ if not meta["type"] in type_keys:
42
+ continue # skip non fitting
43
+
44
+ if stack_filter and meta["stack"] != stack_filter:
45
+ continue # skip filtered out stack
46
+
47
+ if meta[group_key] not in metas_grouped:
48
+ metas_grouped[meta[group_key]] = []
49
+
50
+ metas_grouped[meta[group_key]].append(meta)
51
+
52
+ return metas_grouped
53
+
54
+
55
+ # these functions are necessary to convert python k8s naming to camel case
56
+ def to_camel_case(snake_str):
57
+ components = snake_str.split('_')
58
+ return components[0] + ''.join(x.title() for x in components[1:])
59
+
60
+
61
+ # this one too
62
+ def convert_keys_to_camel_case(obj):
63
+ if isinstance(obj, dict):
64
+ new_dict = {}
65
+ for key, value in obj.items():
66
+ new_key = to_camel_case(key)
67
+ new_dict[new_key] = convert_keys_to_camel_case(value)
68
+ return new_dict
69
+ elif isinstance(obj, list):
70
+ return [convert_keys_to_camel_case(item) for item in obj]
71
+ else:
72
+ return obj
73
+
74
+
75
+ def restore_pvcs(metas_grouped, namespace_secret_dict, args, api_client):
76
+ core_v1 = client.CoreV1Api(api_client=api_client)
77
+ apps_v1 = client.AppsV1Api(api_client=api_client)
78
+ storage_v1 = client.StorageV1Api(api_client=api_client)
79
+
80
+ # get ceph storage classes
81
+ ceph_storage_classes = {sc.metadata.name: sc for sc in storage_v1.list_storage_class().items if sc.provisioner == 'rbd.csi.ceph.com'}
82
+
83
+ # load existing ceph pools and fetch their ids, needed for later pv restoring
84
+ ls_call = subprocess.run(["ceph", "osd", "pool", "ls", "detail", "-f", "json"], check=True, text=True, capture_output=True)
85
+ pool_details = json.loads(ls_call.stdout) # load existing ceph pools and fetch their ids, needed for later pv restoring
86
+
87
+ pool_name_id = {}
88
+ for pool_detail in pool_details:
89
+ pool_name_id[pool_detail["pool_name"]] = pool_detail["pool_id"]
90
+
91
+ # get the cluster id from ceph ns
92
+ ceph_csi_config = core_v1.read_namespaced_config_map(name="ceph-csi-config", namespace="ceph-csi")
93
+
94
+ if not ceph_csi_config:
95
+ raise Exception("Could not find ceph-csi-config config map in ceph-csi namespace")
96
+
97
+ ceph_cluster_id = json.loads(ceph_csi_config.data.get("config.json"))[0]["clusterID"]
98
+
99
+ filter_namespaces = [] if args.namespaces == "" else args.namespaces.split(",")
100
+
101
+ for orig_namespace, metas_group in metas_grouped.items():
102
+ if filter_namespaces and namespace not in filter_namespaces:
103
+ continue # skip filtered out namespaces
104
+
105
+ restore_namespace = orig_namespace
106
+
107
+ for namespace_mapping in args.namespace_mapping:
108
+ if namespace_mapping.startswith(orig_namespace):
109
+ restore_namespace = namespace_mapping.split(":")[1]
110
+ logger.info(f"namespace mapping matched {namespace_mapping}")
111
+
112
+ logger.info(f"trying to restore volumes of {orig_namespace} into {restore_namespace}")
113
+
114
+ auto_scale_replicas = {}
115
+ if args.auto_scale:
116
+ # auto downscale deployments and statefulsets of namespace
117
+ deployments = apps_v1.list_namespaced_deployment(restore_namespace)
118
+ for d in deployments.items:
119
+ name = d.metadata.name
120
+ auto_scale_replicas[f"dp-{name}"] = d.spec.replicas # save original replicas for upscale later
121
+ logger.info(f"Scaling Deployment '{name}' to 0 replicas...")
122
+ apps_v1.patch_namespaced_deployment_scale(
123
+ name=name,
124
+ namespace=restore_namespace,
125
+ body={"spec": {"replicas": 0}}
126
+ )
127
+
128
+ statefulsets = apps_v1.list_namespaced_stateful_set(restore_namespace)
129
+ for s in statefulsets.items:
130
+ name = s.metadata.name
131
+ auto_scale_replicas[f"ss-{name}"] = s.spec.replicas
132
+ logger.info(f"Scaling StatefulSet '{name}' to 0 replicas...")
133
+ apps_v1.patch_namespaced_stateful_set_scale(
134
+ name=name,
135
+ namespace=restore_namespace,
136
+ body={"spec": {"replicas": 0}}
137
+ )
138
+
139
+ # wait for termination
140
+ while True:
141
+ pods = core_v1.list_namespaced_pod(restore_namespace)
142
+ remaining = [
143
+ pod.metadata.name
144
+ for pod in pods.items
145
+ if pod.status.phase in ["Running", "Pending", "Terminating"]
146
+ ]
147
+ if not remaining:
148
+ logger.info("All pods have terminated.")
149
+ break
150
+ logger.info(f"Still active pods: {remaining}")
151
+ time.sleep(5)
152
+
153
+
154
+ # check if namespace has pods => throw exeption and tell user to scale down any
155
+ pods = core_v1.list_namespaced_pod(namespace=restore_namespace)
156
+
157
+ existing_pvcs = set(pvc.metadata.name for pvc in core_v1.list_namespaced_persistent_volume_claim(restore_namespace).items)
158
+ logger.debug(f"existing pvcs {existing_pvcs}")
159
+
160
+ # if any pending / running pods exist fail
161
+ pod_phases = [pod for pod in pods.items if pod.status.phase != "Succeeded"]
162
+ if pod_phases:
163
+ raise Exception(f"found pods in {restore_namespace} - {pod_phases} - scale down all and force delete!")
164
+
165
+ # process secret overwrites
166
+ if args.secret_pattern:
167
+
168
+ namespace_secrets = {secret["metadata"]["name"]: secret for secret in namespace_secret_dict[orig_namespace]}
169
+
170
+ for secret_pattern in args.secret_pattern:
171
+ if secret_pattern.split("/")[0] == restore_namespace:
172
+ # arg that is meant for this namespace restore
173
+ pattern = secret_pattern.split("/")[1]
174
+
175
+ for secret in namespace_secrets:
176
+ if fnmatch.fnmatch(secret, pattern):
177
+ logger.info(f"overwrite pattern matched {pattern}, trying to patch {secret}")
178
+ try:
179
+ core_v1.patch_namespaced_secret(name=secret, namespace=restore_namespace, body={"data": namespace_secrets[secret]["data"]})
180
+ except ApiException as e:
181
+ # if it doesnt exist we simply create it
182
+ if e.status == 404:
183
+ core_v1.create_namespaced_secret(
184
+ namespace=restore_namespace,
185
+ body={"metadata": {"name": secret}, "data": namespace_secrets[secret]["data"]}
186
+ )
187
+ logger.info(f"secret {secret} did not exist, created it instead!")
188
+ else:
189
+ raise
190
+
191
+ if args.auto_delete:
192
+ pvcs = core_v1.list_namespaced_persistent_volume_claim(restore_namespace)
193
+ for pvc in pvcs.items:
194
+ name = pvc.metadata.name
195
+ logger.info(f"Deleting PVC: {name}")
196
+ core_v1.delete_namespaced_persistent_volume_claim(
197
+ name=name,
198
+ namespace=restore_namespace,
199
+ body=client.V1DeleteOptions()
200
+ )
201
+
202
+ while True:
203
+ leftover = core_v1.list_namespaced_persistent_volume_claim(restore_namespace).items
204
+ if not leftover:
205
+ logger.info("All PVCs have been deleted.")
206
+ break
207
+ logger.info(f"Still waiting on: {[p.metadata.name for p in leftover]}")
208
+ time.sleep(5)
209
+
210
+ # there are no more existing pvcs
211
+ existing_pvcs = set()
212
+
213
+
214
+ # extract raw rbd images, import and recreate pvc if necessary
215
+ for meta in metas_group:
216
+ logger.debug(f"restoring {meta}")
217
+
218
+ image_name = meta["image_name"]
219
+
220
+ type = meta["type"]
221
+
222
+ pvc_dict = pickle.loads(base64.b64decode(meta["pvc_dict_b64"]))
223
+ logger.debug(f"pvc_dict:\n{pvc_dict}")
224
+ pv_dict = pickle.loads(base64.b64decode(meta["pv_dict_b64"]))
225
+ logger.debug(f"pv_dict:\n{pv_dict}")
226
+
227
+ # extract from borg archive
228
+ if args.backup_path:
229
+ # we can use the absolute path provided
230
+ full_borg_archive = f"{args.backup_path}borg-{type}/{orig_namespace}::{image_name}_{args.timestamp}"
231
+ else:
232
+ full_borg_archive = f"{os.getcwd()}/borg-{type}/{orig_namespace}::{image_name}_{args.timestamp}"
233
+
234
+ # import the image into ceph
235
+ # move to new pool if mapping is defined
236
+ pool = meta["pool"]
237
+ storage_class = pvc_dict["spec"]["storage_class_name"]
238
+
239
+ if args.pool_sc_mapping:
240
+ for pool_mapping in args.pool_sc_mapping:
241
+ old_pool = pool_mapping.split(":")[0]
242
+ new_pool_sc = pool_mapping.split(":")[1]
243
+ if pool == old_pool:
244
+ pool = new_pool_sc.split("/")[0]
245
+ storage_class = new_pool_sc.split("/")[1]
246
+ logger.debug(f"new mapping specified old pool {old_pool}, new pool {pool}, new sc {storage_class}")
247
+ break
248
+
249
+ new_csi_image_name = f"csi-vol-{uuid.uuid4()}"
250
+
251
+ logger.info(f"extracting borg archive {full_borg_archive} into rbd import {pool}/{new_csi_image_name}")
252
+
253
+ with subprocess.Popen(["borg", "extract", "--sparse", "--stdout", full_borg_archive], stdout=subprocess.PIPE) as proc:
254
+ subprocess.run(["rbd", "import", "-", f"{pool}/{new_csi_image_name}"], check=True, stdin=proc.stdout)
255
+
256
+ # restore from pickled pvc dicts
257
+ new_pv_name = f"pvc-{uuid.uuid4()}"
258
+
259
+ logger.debug(f"restoring pv with new pv name {new_pv_name} and csi image name {new_csi_image_name}")
260
+
261
+ # create the new pvc based on the old - remove dynamic fields of old:
262
+ if pvc_dict['metadata']['name'] in existing_pvcs:
263
+ pvc_name = pvc_dict['metadata']['name']
264
+ pvc_dict['metadata']['name'] = f"test-restore-{pvc_name}"
265
+ logger.info(f"pvc {pvc_name} exists, creating it with test-restore- prefix")
266
+
267
+ # clean the old pvc object so it can be submitted freshly
268
+ pvc_dict['metadata']['annotations'].pop('pv.kubernetes.io/bind-completed', None)
269
+ pvc_dict['metadata']['annotations'].pop('pv.kubernetes.io/bound-by-controller', None)
270
+ pvc_dict['metadata'].pop('finalizers', None)
271
+ pvc_dict['metadata'].pop('managed_fields', None)
272
+ pvc_dict['metadata'].pop('resource_version', None)
273
+ pvc_dict['metadata'].pop('uid', None)
274
+ pvc_dict['metadata'].pop('creation_timestamp', None)
275
+ pvc_dict.pop('status', None)
276
+ pvc_dict.pop('kind', None)
277
+ pvc_dict.pop('api_version', None)
278
+
279
+ # set new values
280
+ pvc_dict['spec']['storage_class_name'] = storage_class
281
+ pvc_dict['metadata']['namespace'] = restore_namespace
282
+
283
+ # we can give it a customized pv name so we know migrated ones - will still behave like a normal created pv
284
+ pvc_dict['spec']['volume_name'] = new_pv_name
285
+
286
+ # creation call
287
+ logger.debug(f"creating new pvc:\n{pformat(pvc_dict)}")
288
+ core_v1.create_namespaced_persistent_volume_claim(namespace=restore_namespace, body=client.V1PersistentVolumeClaim(**convert_keys_to_camel_case(pvc_dict)))
289
+
290
+ # cleanup the old pv aswell for recreation
291
+ pv_dict.pop('api_version', None)
292
+ pv_dict.pop('kind', None)
293
+ pv_dict['metadata'].pop('creation_timestamp', None)
294
+ pv_dict['metadata'].pop('finalizers', None)
295
+ pv_dict['metadata'].pop('managed_fields', None)
296
+ pv_dict['metadata'].pop('resource_version', None)
297
+ pv_dict['metadata']['annotations'].pop('volume.kubernetes.io/provisioner-deletion-secret-name', None)
298
+ pv_dict['metadata']['annotations'].pop('volume.kubernetes.io/provisioner-deletion-secret-namespace', None)
299
+ pv_dict.pop('status', None)
300
+ pv_dict['spec'].pop('claim_ref', None)
301
+ pv_dict['spec'].pop('volume_attributes_class_name', None)
302
+ pv_dict['spec'].pop('scale_io', None)
303
+ pv_dict['spec']['csi'].pop('volume_handle', None)
304
+ pv_dict['spec']['csi']['volume_attributes'].pop('imageName', None)
305
+ pv_dict['spec']['csi']['volume_attributes'].pop('journalPool', None)
306
+ pv_dict['spec']['csi']['volume_attributes'].pop('pool', None)
307
+
308
+ # set values
309
+
310
+ # get the storage class and set secrets from it
311
+ ceph_storage_class = ceph_storage_classes[storage_class]
312
+ pv_dict['metadata']['annotations']['volume.kubernetes.io/provisioner-deletion-secret-name'] = ceph_storage_class.parameters['csi.storage.k8s.io/provisioner-secret-name']
313
+ pv_dict['metadata']['annotations']['volume.kubernetes.io/provisioner-deletion-secret-namespace'] = ceph_storage_class.parameters['csi.storage.k8s.io/provisioner-secret-namespace']
314
+
315
+ pv_dict['spec']['csi']['node_stage_secret_ref']['name'] = ceph_storage_class.parameters['csi.storage.k8s.io/node-stage-secret-name']
316
+ pv_dict['spec']['csi']['node_stage_secret_ref']['namespace'] = ceph_storage_class.parameters['csi.storage.k8s.io/node-stage-secret-namespace']
317
+
318
+ pv_dict['spec']['csi']['controller_expand_secret_ref']['name'] = ceph_storage_class.parameters['csi.storage.k8s.io/controller-expand-secret-name']
319
+ pv_dict['spec']['csi']['controller_expand_secret_ref']['namespace'] = ceph_storage_class.parameters['csi.storage.k8s.io/controller-expand-secret-namespace']
320
+
321
+ pv_dict['spec']['csi']['volume_attributes']['clusterID'] = ceph_cluster_id
322
+
323
+ # reconstruction of volume handle that the ceph csi provisioner understands
324
+ pool_id = format(pool_name_id[pool], '016x')
325
+ trimmed_new_csi_image_name = new_csi_image_name.removeprefix('csi-vol-')
326
+ pv_dict['spec']['csi']['volumeHandle'] = f"0001-0024-{ceph_cluster_id}-{pool_id}-{trimmed_new_csi_image_name}"
327
+
328
+ pv_dict['spec']['csi']['volume_attributes']['imageName'] = new_csi_image_name
329
+ pv_dict['spec']['csi']['volume_attributes']['journalPool'] = pool
330
+ pv_dict['spec']['csi']['volume_attributes']['pool'] = pool
331
+
332
+ pv_dict['spec']['storage_class_name'] = storage_class
333
+
334
+ pv_dict['metadata']['name'] = new_pv_name
335
+
336
+ # creation call
337
+ logger.debug(f"creating new pv:\n{pformat(pv_dict)}")
338
+ core_v1.create_persistent_volume(body=client.V1PersistentVolume(**convert_keys_to_camel_case(pv_dict)))
339
+
340
+ # scale back up again
341
+ if args.auto_scale:
342
+ # auto downscale deployments and statefulsets of namespace
343
+ deployments = apps_v1.list_namespaced_deployment(restore_namespace)
344
+ for d in deployments.items:
345
+ name = d.metadata.name
346
+ logger.info(f"Scaling Deployment '{name}' back up...")
347
+ apps_v1.patch_namespaced_deployment_scale(
348
+ name=name,
349
+ namespace=restore_namespace,
350
+ body={"spec": {"replicas": auto_scale_replicas[f"dp-{name}"]}}
351
+ )
352
+
353
+ statefulsets = apps_v1.list_namespaced_stateful_set(restore_namespace)
354
+ for s in statefulsets.items:
355
+ name = s.metadata.name
356
+ logger.info(f"Scaling StatefulSet '{name}' back up...")
357
+ apps_v1.patch_namespaced_stateful_set_scale(
358
+ name=name,
359
+ namespace=restore_namespace,
360
+ body={"spec": {"replicas": auto_scale_replicas[f"ss-{name}"]}}
361
+ )
362
+
363
+ logger.info(f"restore of namespace {orig_namespace} into {restore_namespace} complete, you can now scale up your deployments again")
364
+
365
+
366
+ def get_image_metas(args, timestamp_filter = None):
367
+ image_meta_db = TinyDB(f"{args.backup_path}image-meta-db.json")
368
+
369
+ archives = []
370
+
371
+ # iterate all k8s namespaced borg repos
372
+ k8s_base_path = f"{args.backup_path}/borg-k8s"
373
+ namespace_repos = [name for name in os.listdir(k8s_base_path) if os.path.isdir(os.path.join(k8s_base_path, name))]
374
+
375
+ for repo in namespace_repos:
376
+ list_result = subprocess.run(["borg", "list", f"{args.backup_path}/borg-k8s/{repo}", "--json"], capture_output=True)
377
+
378
+ if list_result.returncode != 0:
379
+ raise Exception(f"Borg list failed for repo {repo}: {list_result.stderr.decode()}")
380
+
381
+ archives.extend(json.loads(list_result.stdout)["archives"])
382
+
383
+ timestamp_archives = {}
384
+ for archive in archives:
385
+ image = archive["archive"].split("_", 1)[0]
386
+ timestamp = archive["archive"].split("_", 1)[1]
387
+
388
+ if timestamp_filter is not None and timestamp_filter != timestamp:
389
+ continue # skip filtered
390
+
391
+ if timestamp not in timestamp_archives:
392
+ timestamp_archives[timestamp] = []
393
+
394
+ Meta = Query()
395
+ image_meta = image_meta_db.get((Meta.image_name == image) & (Meta.timestamp == timestamp))
396
+
397
+ if image_meta is None:
398
+ logger.error(f"None meta found {timestamp}, image_name {image}, archive {archive}")
399
+ del timestamp_archives[timestamp]
400
+ continue
401
+
402
+ timestamp_archives[timestamp].append(image_meta)
403
+
404
+ return timestamp_archives
405
+
406
+
407
+ def copy_backup_generic():
408
+ source_dir = '/opt/bdd'
409
+ for file in os.listdir(source_dir):
410
+ if not file.startswith("."):
411
+ full_source_path = os.path.join(source_dir, file)
412
+ full_dest_path = os.path.join(BACKUP_BASE_DIR, file)
413
+
414
+ if os.path.isfile(full_source_path):
415
+ shutil.copy2(full_source_path, full_dest_path)