gooddata-pipelines 1.49.1.dev1__py3-none-any.whl → 1.50.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gooddata-pipelines might be problematic. Click here for more details.
- gooddata_pipelines/__init__.py +7 -1
- gooddata_pipelines/api/gooddata_api.py +0 -54
- gooddata_pipelines/backup_and_restore/backup_manager.py +50 -44
- gooddata_pipelines/backup_and_restore/constants.py +2 -1
- gooddata_pipelines/backup_and_restore/models/storage.py +40 -2
- gooddata_pipelines/backup_and_restore/storage/s3_storage.py +22 -11
- gooddata_pipelines/provisioning/entities/users/models/permissions.py +23 -79
- gooddata_pipelines/provisioning/entities/users/models/user_groups.py +23 -50
- gooddata_pipelines/provisioning/entities/users/models/users.py +9 -49
- gooddata_pipelines/provisioning/entities/users/permissions.py +14 -6
- gooddata_pipelines/provisioning/entities/users/user_groups.py +7 -1
- gooddata_pipelines/provisioning/entities/users/users.py +3 -0
- gooddata_pipelines/provisioning/entities/workspaces/models.py +16 -15
- gooddata_pipelines/provisioning/entities/workspaces/workspace.py +52 -5
- gooddata_pipelines/provisioning/entities/workspaces/workspace_data_parser.py +9 -6
- gooddata_pipelines/provisioning/provisioning.py +24 -6
- gooddata_pipelines/provisioning/utils/context_objects.py +6 -6
- gooddata_pipelines/provisioning/utils/utils.py +3 -15
- {gooddata_pipelines-1.49.1.dev1.dist-info → gooddata_pipelines-1.50.0.dist-info}/METADATA +2 -2
- {gooddata_pipelines-1.49.1.dev1.dist-info → gooddata_pipelines-1.50.0.dist-info}/RECORD +22 -22
- {gooddata_pipelines-1.49.1.dev1.dist-info → gooddata_pipelines-1.50.0.dist-info}/WHEEL +0 -0
- {gooddata_pipelines-1.49.1.dev1.dist-info → gooddata_pipelines-1.50.0.dist-info}/licenses/LICENSE.txt +0 -0
gooddata_pipelines/__init__.py
CHANGED
|
@@ -19,6 +19,7 @@ from .provisioning.entities.user_data_filters.user_data_filters import (
|
|
|
19
19
|
UserDataFilterProvisioner,
|
|
20
20
|
)
|
|
21
21
|
from .provisioning.entities.users.models.permissions import (
|
|
22
|
+
EntityType,
|
|
22
23
|
PermissionFullLoad,
|
|
23
24
|
PermissionIncrementalLoad,
|
|
24
25
|
)
|
|
@@ -33,7 +34,10 @@ from .provisioning.entities.users.models.users import (
|
|
|
33
34
|
from .provisioning.entities.users.permissions import PermissionProvisioner
|
|
34
35
|
from .provisioning.entities.users.user_groups import UserGroupProvisioner
|
|
35
36
|
from .provisioning.entities.users.users import UserProvisioner
|
|
36
|
-
from .provisioning.entities.workspaces.models import
|
|
37
|
+
from .provisioning.entities.workspaces.models import (
|
|
38
|
+
WorkspaceFullLoad,
|
|
39
|
+
WorkspaceIncrementalLoad,
|
|
40
|
+
)
|
|
37
41
|
from .provisioning.entities.workspaces.workspace import WorkspaceProvisioner
|
|
38
42
|
|
|
39
43
|
__all__ = [
|
|
@@ -52,8 +56,10 @@ __all__ = [
|
|
|
52
56
|
"UserGroupFullLoad",
|
|
53
57
|
"UserProvisioner",
|
|
54
58
|
"UserGroupProvisioner",
|
|
59
|
+
"WorkspaceIncrementalLoad",
|
|
55
60
|
"PermissionProvisioner",
|
|
56
61
|
"UserDataFilterProvisioner",
|
|
57
62
|
"UserDataFilterFullLoad",
|
|
63
|
+
"EntityType",
|
|
58
64
|
"__version__",
|
|
59
65
|
]
|
|
@@ -7,9 +7,6 @@ from typing import Any
|
|
|
7
7
|
|
|
8
8
|
import requests
|
|
9
9
|
|
|
10
|
-
# TODO: Limit the use of "typing.Any". Improve readability by using either models
|
|
11
|
-
# or typed dicts.
|
|
12
|
-
|
|
13
10
|
TIMEOUT = 60
|
|
14
11
|
REQUEST_PAGE_SIZE = 250
|
|
15
12
|
API_VERSION = "v1"
|
|
@@ -55,42 +52,6 @@ class ApiMethods:
|
|
|
55
52
|
"""
|
|
56
53
|
return f"{self.base_url}{endpoint}"
|
|
57
54
|
|
|
58
|
-
def get_custom_application_setting(
|
|
59
|
-
self, workspace_id: str, setting_id: str
|
|
60
|
-
) -> requests.Response:
|
|
61
|
-
"""Gets a custom application setting.
|
|
62
|
-
|
|
63
|
-
Args:
|
|
64
|
-
workspace_id (str): The ID of the workspace.
|
|
65
|
-
setting_id (str): The ID of the custom application setting.
|
|
66
|
-
Returns:
|
|
67
|
-
requests.Response: The response from the server containing the
|
|
68
|
-
custom application setting.
|
|
69
|
-
"""
|
|
70
|
-
url = f"/entities/workspaces/{workspace_id}/customApplicationSettings/{setting_id}"
|
|
71
|
-
return self._get(url)
|
|
72
|
-
|
|
73
|
-
def put_custom_application_setting(
|
|
74
|
-
self, workspace_id: str, setting_id: str, data: dict[str, Any]
|
|
75
|
-
) -> requests.Response:
|
|
76
|
-
url = f"/entities/workspaces/{workspace_id}/customApplicationSettings/{setting_id}"
|
|
77
|
-
return self._put(url, data, self.headers)
|
|
78
|
-
|
|
79
|
-
def post_custom_application_setting(
|
|
80
|
-
self, workspace_id: str, data: dict[str, Any]
|
|
81
|
-
) -> requests.Response:
|
|
82
|
-
"""Creates a custom application setting for a given workspace.
|
|
83
|
-
|
|
84
|
-
Args:
|
|
85
|
-
workspace_id (str): The ID of the workspace.
|
|
86
|
-
data (dict[str, Any]): The data for the custom application setting.
|
|
87
|
-
Returns:
|
|
88
|
-
requests.Response: The response from the server containing the
|
|
89
|
-
created custom application setting.
|
|
90
|
-
"""
|
|
91
|
-
url = f"/entities/workspaces/{workspace_id}/customApplicationSettings/"
|
|
92
|
-
return self._post(url, data, self.headers)
|
|
93
|
-
|
|
94
55
|
def get_all_workspace_data_filters(
|
|
95
56
|
self, workspace_id: str
|
|
96
57
|
) -> requests.Response:
|
|
@@ -201,21 +162,6 @@ class ApiMethods:
|
|
|
201
162
|
endpoint,
|
|
202
163
|
)
|
|
203
164
|
|
|
204
|
-
def post_workspace_data_filter(
|
|
205
|
-
self, workspace_id: str, data: dict[str, Any]
|
|
206
|
-
) -> requests.Response:
|
|
207
|
-
"""Creates a workspace data filter for a given workspace.
|
|
208
|
-
|
|
209
|
-
Args:
|
|
210
|
-
workspace_id (str): The ID of the workspace.
|
|
211
|
-
data (dict[str, Any]): The data for the workspace data filter.
|
|
212
|
-
Returns:
|
|
213
|
-
requests.Response: The response from the server containing the
|
|
214
|
-
created workspace data filter.
|
|
215
|
-
"""
|
|
216
|
-
endpoint = f"/entities/workspaces/{workspace_id}/workspaceDataFilters"
|
|
217
|
-
return self._post(endpoint, data, self.headers)
|
|
218
|
-
|
|
219
165
|
def get_user_data_filters(self, workspace_id: str) -> requests.Response:
|
|
220
166
|
"""Gets the user data filters for a given workspace."""
|
|
221
167
|
endpoint = f"/layout/workspaces/{workspace_id}/userDataFilters"
|
|
@@ -55,7 +55,7 @@ class BackupManager:
|
|
|
55
55
|
|
|
56
56
|
self.config = config
|
|
57
57
|
|
|
58
|
-
self.storage = self.
|
|
58
|
+
self.storage = self._get_storage(self.config)
|
|
59
59
|
self.org_id = self._api.get_organization_id()
|
|
60
60
|
|
|
61
61
|
self.loader = BackupInputProcessor(self._api, self.config.api_page_size)
|
|
@@ -67,7 +67,7 @@ class BackupManager:
|
|
|
67
67
|
host: str,
|
|
68
68
|
token: str,
|
|
69
69
|
) -> "BackupManager":
|
|
70
|
-
"""Creates a backup worker instance using provided host and token."""
|
|
70
|
+
"""Creates a backup worker instance using the provided host and token."""
|
|
71
71
|
return cls(host=host, token=token, config=config)
|
|
72
72
|
|
|
73
73
|
@classmethod
|
|
@@ -81,7 +81,8 @@ class BackupManager:
|
|
|
81
81
|
content = profile_content(profile, profiles_path)
|
|
82
82
|
return cls(**content, config=config)
|
|
83
83
|
|
|
84
|
-
|
|
84
|
+
@staticmethod
|
|
85
|
+
def _get_storage(conf: BackupRestoreConfig) -> BackupStorage:
|
|
85
86
|
"""Returns the storage class based on the storage type."""
|
|
86
87
|
if conf.storage_type == StorageType.S3:
|
|
87
88
|
return S3Storage(conf)
|
|
@@ -100,7 +101,7 @@ class BackupManager:
|
|
|
100
101
|
else:
|
|
101
102
|
raise RuntimeError(f"{response.status_code}: {response.text}")
|
|
102
103
|
|
|
103
|
-
def
|
|
104
|
+
def _store_user_data_filters(
|
|
104
105
|
self,
|
|
105
106
|
user_data_filters: dict,
|
|
106
107
|
export_path: Path,
|
|
@@ -128,20 +129,20 @@ class BackupManager:
|
|
|
128
129
|
"user_data_filters",
|
|
129
130
|
filter["id"] + ".yaml",
|
|
130
131
|
)
|
|
131
|
-
self.
|
|
132
|
+
self._write_to_yaml(udf_file_path, filter)
|
|
132
133
|
|
|
133
134
|
@staticmethod
|
|
134
|
-
def
|
|
135
|
+
def _move_folder(source: Path, destination: Path) -> None:
|
|
135
136
|
"""Moves the source folder to the destination."""
|
|
136
137
|
shutil.move(source, destination)
|
|
137
138
|
|
|
138
139
|
@staticmethod
|
|
139
|
-
def
|
|
140
|
+
def _write_to_yaml(path: str, source: Any) -> None:
|
|
140
141
|
"""Writes the source to a YAML file."""
|
|
141
142
|
with open(path, "w") as outfile:
|
|
142
143
|
yaml.dump(source, outfile)
|
|
143
144
|
|
|
144
|
-
def
|
|
145
|
+
def _get_automations_from_api(self, workspace_id: str) -> Any:
|
|
145
146
|
"""Returns automations for the workspace as JSON."""
|
|
146
147
|
response: requests.Response = self._api.get_automations(workspace_id)
|
|
147
148
|
if response.ok:
|
|
@@ -152,10 +153,10 @@ class BackupManager:
|
|
|
152
153
|
+ f"{response.status_code}: {response.text}"
|
|
153
154
|
)
|
|
154
155
|
|
|
155
|
-
def
|
|
156
|
+
def _store_automations(self, export_path: Path, workspace_id: str) -> None:
|
|
156
157
|
"""Stores the automations in the specified export path."""
|
|
157
158
|
# Get the automations from the API
|
|
158
|
-
automations: Any = self.
|
|
159
|
+
automations: Any = self._get_automations_from_api(workspace_id)
|
|
159
160
|
|
|
160
161
|
automations_folder_path: Path = Path(
|
|
161
162
|
export_path,
|
|
@@ -184,8 +185,8 @@ class BackupManager:
|
|
|
184
185
|
# Get the filter views YAML files from the API
|
|
185
186
|
self._api.store_declarative_filter_views(workspace_id, export_path)
|
|
186
187
|
|
|
187
|
-
# Move filter views to the subfolder containing analytics model
|
|
188
|
-
self.
|
|
188
|
+
# Move filter views to the subfolder containing the analytics model
|
|
189
|
+
self._move_folder(
|
|
189
190
|
Path(export_path, "gooddata_layouts", self.org_id, "filter_views"),
|
|
190
191
|
Path(
|
|
191
192
|
export_path,
|
|
@@ -197,7 +198,7 @@ class BackupManager:
|
|
|
197
198
|
),
|
|
198
199
|
)
|
|
199
200
|
|
|
200
|
-
def
|
|
201
|
+
def _get_workspace_export(
|
|
201
202
|
self,
|
|
202
203
|
local_target_path: str,
|
|
203
204
|
workspaces_to_export: list[str],
|
|
@@ -232,9 +233,9 @@ class BackupManager:
|
|
|
232
233
|
# be more transparent/readable and possibly safer for threading
|
|
233
234
|
self._api.store_declarative_workspace(workspace_id, export_path)
|
|
234
235
|
self.store_declarative_filter_views(export_path, workspace_id)
|
|
235
|
-
self.
|
|
236
|
+
self._store_automations(export_path, workspace_id)
|
|
236
237
|
|
|
237
|
-
self.
|
|
238
|
+
self._store_user_data_filters(
|
|
238
239
|
user_data_filters, export_path, workspace_id
|
|
239
240
|
)
|
|
240
241
|
self.logger.info(f"Stored export for {workspace_id}")
|
|
@@ -250,7 +251,7 @@ class BackupManager:
|
|
|
250
251
|
+ "is correct and that the workspaces exist."
|
|
251
252
|
)
|
|
252
253
|
|
|
253
|
-
def
|
|
254
|
+
def _archive_gooddata_layouts_to_zip(self, folder: str) -> None:
|
|
254
255
|
"""Archives the gooddata_layouts directory to a zip file."""
|
|
255
256
|
try:
|
|
256
257
|
target_subdir = ""
|
|
@@ -271,11 +272,12 @@ class BackupManager:
|
|
|
271
272
|
self.logger.error(f"Error archiving {folder} to zip: {e}")
|
|
272
273
|
raise
|
|
273
274
|
|
|
274
|
-
|
|
275
|
-
|
|
275
|
+
@staticmethod
|
|
276
|
+
def _split_to_batches(
|
|
277
|
+
workspaces_to_export: list[str], batch_size: int
|
|
276
278
|
) -> list[BackupBatch]:
|
|
277
|
-
"""Splits the list of workspaces
|
|
278
|
-
The batch is
|
|
279
|
+
"""Splits the list of workspaces into batches of the specified size.
|
|
280
|
+
The batch is represented as a list of workspace IDs.
|
|
279
281
|
Returns a list of batches (i.e. list of lists of IDs)
|
|
280
282
|
"""
|
|
281
283
|
list_of_batches = []
|
|
@@ -286,7 +288,7 @@ class BackupManager:
|
|
|
286
288
|
|
|
287
289
|
return list_of_batches
|
|
288
290
|
|
|
289
|
-
def
|
|
291
|
+
def _process_batch(
|
|
290
292
|
self,
|
|
291
293
|
batch: BackupBatch,
|
|
292
294
|
stop_event: threading.Event,
|
|
@@ -298,14 +300,14 @@ class BackupManager:
|
|
|
298
300
|
The base wait time is defined by BackupSettings.RETRY_DELAY.
|
|
299
301
|
"""
|
|
300
302
|
if stop_event.is_set():
|
|
301
|
-
# If the stop_event flag is set, return. This will terminate the thread
|
|
303
|
+
# If the stop_event flag is set, return. This will terminate the thread
|
|
302
304
|
return
|
|
303
305
|
|
|
304
306
|
try:
|
|
305
307
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
306
|
-
self.
|
|
308
|
+
self._get_workspace_export(tmpdir, batch.list_of_ids)
|
|
307
309
|
|
|
308
|
-
self.
|
|
310
|
+
self._archive_gooddata_layouts_to_zip(
|
|
309
311
|
str(Path(tmpdir, self.org_id))
|
|
310
312
|
)
|
|
311
313
|
|
|
@@ -316,7 +318,7 @@ class BackupManager:
|
|
|
316
318
|
return
|
|
317
319
|
|
|
318
320
|
elif retry_count < BackupSettings.MAX_RETRIES:
|
|
319
|
-
# Retry with exponential backoff until MAX_RETRIES
|
|
321
|
+
# Retry with exponential backoff until MAX_RETRIES
|
|
320
322
|
next_retry = retry_count + 1
|
|
321
323
|
wait_time = BackupSettings.RETRY_DELAY**next_retry
|
|
322
324
|
self.logger.info(
|
|
@@ -326,13 +328,13 @@ class BackupManager:
|
|
|
326
328
|
)
|
|
327
329
|
|
|
328
330
|
time.sleep(wait_time)
|
|
329
|
-
self.
|
|
331
|
+
self._process_batch(batch, stop_event, next_retry)
|
|
330
332
|
else:
|
|
331
|
-
# If the batch fails after MAX_RETRIES, raise the error
|
|
333
|
+
# If the batch fails after MAX_RETRIES, raise the error
|
|
332
334
|
self.logger.error(f"Batch failed: {e.__class__.__name__}: {e}")
|
|
333
335
|
raise
|
|
334
336
|
|
|
335
|
-
def
|
|
337
|
+
def _process_batches_in_parallel(
|
|
336
338
|
self,
|
|
337
339
|
batches: list[BackupBatch],
|
|
338
340
|
) -> None:
|
|
@@ -345,14 +347,14 @@ class BackupManager:
|
|
|
345
347
|
stop_event = threading.Event()
|
|
346
348
|
|
|
347
349
|
with ThreadPoolExecutor(
|
|
348
|
-
max_workers=
|
|
350
|
+
max_workers=self.config.max_workers
|
|
349
351
|
) as executor:
|
|
350
352
|
# Set the futures tasks.
|
|
351
353
|
futures = []
|
|
352
354
|
for batch in batches:
|
|
353
355
|
futures.append(
|
|
354
356
|
executor.submit(
|
|
355
|
-
self.
|
|
357
|
+
self._process_batch,
|
|
356
358
|
batch,
|
|
357
359
|
stop_event,
|
|
358
360
|
)
|
|
@@ -363,10 +365,10 @@ class BackupManager:
|
|
|
363
365
|
try:
|
|
364
366
|
future.result()
|
|
365
367
|
except Exception:
|
|
366
|
-
# On failure, set the flag to True - signal running processes to stop
|
|
368
|
+
# On failure, set the flag to True - signal running processes to stop
|
|
367
369
|
stop_event.set()
|
|
368
370
|
|
|
369
|
-
# Cancel unstarted threads
|
|
371
|
+
# Cancel unstarted threads
|
|
370
372
|
for f in futures:
|
|
371
373
|
if not f.done():
|
|
372
374
|
f.cancel()
|
|
@@ -374,7 +376,9 @@ class BackupManager:
|
|
|
374
376
|
raise
|
|
375
377
|
|
|
376
378
|
def backup_workspaces(
|
|
377
|
-
self,
|
|
379
|
+
self,
|
|
380
|
+
path_to_csv: str | None = None,
|
|
381
|
+
workspace_ids: list[str] | None = None,
|
|
378
382
|
) -> None:
|
|
379
383
|
"""Runs the backup process for a list of workspace IDs.
|
|
380
384
|
|
|
@@ -383,27 +387,29 @@ class BackupManager:
|
|
|
383
387
|
workspace in storage specified in the configuration.
|
|
384
388
|
|
|
385
389
|
Args:
|
|
386
|
-
path_to_csv (str): Path to a CSV file containing a list of workspace IDs
|
|
390
|
+
path_to_csv (str): Path to a CSV file containing a list of workspace IDs
|
|
387
391
|
workspace_ids (list[str]): List of workspace IDs
|
|
388
392
|
"""
|
|
389
|
-
self.
|
|
393
|
+
self._backup(InputType.LIST_OF_WORKSPACES, path_to_csv, workspace_ids)
|
|
390
394
|
|
|
391
395
|
def backup_hierarchies(
|
|
392
|
-
self,
|
|
396
|
+
self,
|
|
397
|
+
path_to_csv: str | None = None,
|
|
398
|
+
workspace_ids: list[str] | None = None,
|
|
393
399
|
) -> None:
|
|
394
400
|
"""Runs the backup process for a list of hierarchies.
|
|
395
401
|
|
|
396
402
|
Will take the list of workspace IDs or read the list of workspace IDs
|
|
397
|
-
from a CSV file and create backup for each those workspaces' hierarchies
|
|
403
|
+
from a CSV file and create backup for each of those workspaces' hierarchies
|
|
398
404
|
in storage specified in the configuration.
|
|
399
405
|
Workspace hierarchy means the workspace itself and all its direct and
|
|
400
406
|
indirect children.
|
|
401
407
|
|
|
402
408
|
Args:
|
|
403
|
-
path_to_csv (str): Path to a CSV file containing a list of workspace IDs
|
|
409
|
+
path_to_csv (str): Path to a CSV file containing a list of workspace IDs
|
|
404
410
|
workspace_ids (list[str]): List of workspace IDs
|
|
405
411
|
"""
|
|
406
|
-
self.
|
|
412
|
+
self._backup(InputType.HIERARCHY, path_to_csv, workspace_ids)
|
|
407
413
|
|
|
408
414
|
def backup_entire_organization(self) -> None:
|
|
409
415
|
"""Runs the backup process for the entire organization.
|
|
@@ -411,22 +417,22 @@ class BackupManager:
|
|
|
411
417
|
Will create backup for all workspaces in the organization in storage
|
|
412
418
|
specified in the configuration.
|
|
413
419
|
"""
|
|
414
|
-
self.
|
|
420
|
+
self._backup(InputType.ORGANIZATION)
|
|
415
421
|
|
|
416
|
-
def
|
|
422
|
+
def _backup(
|
|
417
423
|
self,
|
|
418
424
|
input_type: InputType,
|
|
419
425
|
path_to_csv: str | None = None,
|
|
420
426
|
workspace_ids: list[str] | None = None,
|
|
421
427
|
) -> None:
|
|
422
|
-
"""Runs the backup process with selected input type."""
|
|
428
|
+
"""Runs the backup process with the selected input type."""
|
|
423
429
|
try:
|
|
424
430
|
workspaces_to_export: list[str] = self.loader.get_ids_to_backup(
|
|
425
431
|
input_type,
|
|
426
432
|
path_to_csv,
|
|
427
433
|
workspace_ids,
|
|
428
434
|
)
|
|
429
|
-
batches = self.
|
|
435
|
+
batches = self._split_to_batches(
|
|
430
436
|
workspaces_to_export, self.config.batch_size
|
|
431
437
|
)
|
|
432
438
|
|
|
@@ -434,7 +440,7 @@ class BackupManager:
|
|
|
434
440
|
f"Exporting {len(workspaces_to_export)} workspaces in {len(batches)} batches."
|
|
435
441
|
)
|
|
436
442
|
|
|
437
|
-
self.
|
|
443
|
+
self._process_batches_in_parallel(batches)
|
|
438
444
|
|
|
439
445
|
self.logger.info("Backup completed")
|
|
440
446
|
except Exception as e:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# (C) 2025 GoodData Corporation
|
|
1
2
|
import datetime
|
|
2
3
|
from dataclasses import dataclass
|
|
3
4
|
|
|
@@ -22,7 +23,7 @@ class DirNames:
|
|
|
22
23
|
|
|
23
24
|
@dataclass(frozen=True)
|
|
24
25
|
class ConcurrencyDefaults:
|
|
25
|
-
MAX_WORKERS =
|
|
26
|
+
MAX_WORKERS = 1
|
|
26
27
|
DEFAULT_BATCH_SIZE = 100
|
|
27
28
|
|
|
28
29
|
|
|
@@ -21,10 +21,40 @@ class S3StorageConfig(BaseModel):
|
|
|
21
21
|
|
|
22
22
|
backup_path: str
|
|
23
23
|
bucket: str
|
|
24
|
-
profile: str =
|
|
24
|
+
profile: Optional[str] = None
|
|
25
25
|
aws_access_key_id: Optional[str] = None
|
|
26
26
|
aws_secret_access_key: Optional[str] = None
|
|
27
|
-
aws_default_region: Optional[str] =
|
|
27
|
+
aws_default_region: Optional[str] = "us-east-1"
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def from_iam_role(cls, backup_path: str, bucket: str) -> "S3StorageConfig":
|
|
31
|
+
"""Use default IAM role or environment credentials."""
|
|
32
|
+
return cls(backup_path=backup_path, bucket=bucket)
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def from_aws_credentials(
|
|
36
|
+
cls,
|
|
37
|
+
backup_path: str,
|
|
38
|
+
bucket: str,
|
|
39
|
+
aws_access_key_id: str,
|
|
40
|
+
aws_secret_access_key: str,
|
|
41
|
+
aws_default_region: str,
|
|
42
|
+
) -> "S3StorageConfig":
|
|
43
|
+
"""Use explicit AWS access keys and region."""
|
|
44
|
+
return cls(
|
|
45
|
+
backup_path=backup_path,
|
|
46
|
+
bucket=bucket,
|
|
47
|
+
aws_access_key_id=aws_access_key_id,
|
|
48
|
+
aws_secret_access_key=aws_secret_access_key,
|
|
49
|
+
aws_default_region=aws_default_region,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def from_aws_profile(
|
|
54
|
+
cls, backup_path: str, bucket: str, profile: str
|
|
55
|
+
) -> "S3StorageConfig":
|
|
56
|
+
"""Use a named AWS CLI profile."""
|
|
57
|
+
return cls(backup_path=backup_path, bucket=bucket, profile=profile)
|
|
28
58
|
|
|
29
59
|
|
|
30
60
|
class LocalStorageConfig(BaseModel):
|
|
@@ -53,6 +83,14 @@ class BackupRestoreConfig(BaseModel):
|
|
|
53
83
|
description="Batch size must be greater than 0",
|
|
54
84
|
),
|
|
55
85
|
] = Field(default=BackupSettings.DEFAULT_BATCH_SIZE)
|
|
86
|
+
max_workers: Annotated[
|
|
87
|
+
int,
|
|
88
|
+
Field(
|
|
89
|
+
gt=0,
|
|
90
|
+
lt=3,
|
|
91
|
+
description="Max workers must be greater than 0 and less than 3",
|
|
92
|
+
),
|
|
93
|
+
] = Field(default=BackupSettings.MAX_WORKERS)
|
|
56
94
|
|
|
57
95
|
@classmethod
|
|
58
96
|
def from_yaml(cls, conf_path: str) -> "BackupRestoreConfig":
|
|
@@ -22,6 +22,7 @@ class S3Storage(BackupStorage):
|
|
|
22
22
|
|
|
23
23
|
self._config = conf.storage
|
|
24
24
|
self._session = self._create_boto_session(self._config)
|
|
25
|
+
self._client = self._session.client("s3")
|
|
25
26
|
self._resource = self._session.resource("s3")
|
|
26
27
|
self._bucket = self._resource.Bucket(self._config.bucket) # type: ignore [missing library stubs]
|
|
27
28
|
suffix = "/" if not self._config.backup_path.endswith("/") else ""
|
|
@@ -43,32 +44,40 @@ class S3Storage(BackupStorage):
|
|
|
43
44
|
)
|
|
44
45
|
except Exception:
|
|
45
46
|
self.logger.warning(
|
|
46
|
-
"Failed to create boto3 session with supplied credentials.
|
|
47
|
+
"Failed to create boto3 session with supplied credentials."
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
if config.profile:
|
|
51
|
+
try:
|
|
52
|
+
return boto3.Session(profile_name=config.profile)
|
|
53
|
+
except Exception:
|
|
54
|
+
self.logger.warning(
|
|
55
|
+
f"AWS profile [{config.profile}] not found."
|
|
47
56
|
)
|
|
48
57
|
|
|
49
58
|
try:
|
|
50
|
-
return boto3.Session(
|
|
59
|
+
return boto3.Session()
|
|
51
60
|
except Exception:
|
|
52
|
-
self.logger.
|
|
53
|
-
|
|
61
|
+
self.logger.error(
|
|
62
|
+
"Failed to create boto3 session with default IAM role or environment credentials."
|
|
63
|
+
)
|
|
64
|
+
raise RuntimeError(
|
|
65
|
+
"Unable to create AWS session. Please check your AWS credentials, profile, or IAM role configuration."
|
|
54
66
|
)
|
|
55
|
-
|
|
56
|
-
return boto3.Session()
|
|
57
67
|
|
|
58
68
|
def _verify_connection(self) -> None:
|
|
59
69
|
"""
|
|
60
70
|
Pings the S3 bucket to verify that the connection is working.
|
|
61
71
|
"""
|
|
62
72
|
try:
|
|
63
|
-
|
|
64
|
-
self._resource.meta.client.head_bucket(Bucket=self._config.bucket)
|
|
73
|
+
self._client.head_bucket(Bucket=self._config.bucket)
|
|
65
74
|
except Exception as e:
|
|
66
75
|
raise RuntimeError(
|
|
67
76
|
f"Failed to connect to S3 bucket {self._config.bucket}: {e}"
|
|
68
77
|
)
|
|
69
78
|
|
|
70
79
|
def export(self, folder: str, org_id: str) -> None:
|
|
71
|
-
"""Uploads the content of the folder to S3 as backup."""
|
|
80
|
+
"""Uploads the content of the folder to S3 as a backup."""
|
|
72
81
|
storage_path = f"{self._config.bucket}/{self._backup_path}"
|
|
73
82
|
self.logger.info(f"Uploading {org_id} to {storage_path}")
|
|
74
83
|
folder = f"{folder}/{org_id}"
|
|
@@ -77,10 +86,12 @@ class S3Storage(BackupStorage):
|
|
|
77
86
|
export_path = (
|
|
78
87
|
f"{self._backup_path}{org_id}/{full_path[len(folder) + 1 :]}/"
|
|
79
88
|
)
|
|
80
|
-
self.
|
|
89
|
+
self._client.put_object(Bucket=self._config.bucket, Key=export_path)
|
|
81
90
|
|
|
82
91
|
for file in files:
|
|
83
92
|
full_path = os.path.join(subdir, file)
|
|
84
93
|
with open(full_path, "rb") as data:
|
|
85
94
|
export_path = f"{self._backup_path}{org_id}/{full_path[len(folder) + 1 :]}"
|
|
86
|
-
self.
|
|
95
|
+
self._client.put_object(
|
|
96
|
+
Bucket=self._config.bucket, Key=export_path, Body=data
|
|
97
|
+
)
|