gooddata-pipelines 1.47.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gooddata-pipelines might be problematic. Click here for more details.

Files changed (54) hide show
  1. gooddata_pipelines/__init__.py +59 -0
  2. gooddata_pipelines/_version.py +7 -0
  3. gooddata_pipelines/api/__init__.py +5 -0
  4. gooddata_pipelines/api/exceptions.py +41 -0
  5. gooddata_pipelines/api/gooddata_api.py +309 -0
  6. gooddata_pipelines/api/gooddata_api_wrapper.py +36 -0
  7. gooddata_pipelines/api/gooddata_sdk.py +374 -0
  8. gooddata_pipelines/api/utils.py +43 -0
  9. gooddata_pipelines/backup_and_restore/__init__.py +1 -0
  10. gooddata_pipelines/backup_and_restore/backup_input_processor.py +195 -0
  11. gooddata_pipelines/backup_and_restore/backup_manager.py +430 -0
  12. gooddata_pipelines/backup_and_restore/constants.py +42 -0
  13. gooddata_pipelines/backup_and_restore/csv_reader.py +41 -0
  14. gooddata_pipelines/backup_and_restore/models/__init__.py +1 -0
  15. gooddata_pipelines/backup_and_restore/models/input_type.py +11 -0
  16. gooddata_pipelines/backup_and_restore/models/storage.py +58 -0
  17. gooddata_pipelines/backup_and_restore/models/workspace_response.py +51 -0
  18. gooddata_pipelines/backup_and_restore/storage/__init__.py +1 -0
  19. gooddata_pipelines/backup_and_restore/storage/base_storage.py +18 -0
  20. gooddata_pipelines/backup_and_restore/storage/local_storage.py +37 -0
  21. gooddata_pipelines/backup_and_restore/storage/s3_storage.py +71 -0
  22. gooddata_pipelines/logger/__init__.py +8 -0
  23. gooddata_pipelines/logger/logger.py +115 -0
  24. gooddata_pipelines/provisioning/__init__.py +31 -0
  25. gooddata_pipelines/provisioning/assets/wdf_setting.json +14 -0
  26. gooddata_pipelines/provisioning/entities/__init__.py +1 -0
  27. gooddata_pipelines/provisioning/entities/user_data_filters/__init__.py +1 -0
  28. gooddata_pipelines/provisioning/entities/user_data_filters/models/__init__.py +1 -0
  29. gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py +32 -0
  30. gooddata_pipelines/provisioning/entities/user_data_filters/user_data_filters.py +221 -0
  31. gooddata_pipelines/provisioning/entities/users/__init__.py +1 -0
  32. gooddata_pipelines/provisioning/entities/users/models/__init__.py +1 -0
  33. gooddata_pipelines/provisioning/entities/users/models/permissions.py +242 -0
  34. gooddata_pipelines/provisioning/entities/users/models/user_groups.py +64 -0
  35. gooddata_pipelines/provisioning/entities/users/models/users.py +114 -0
  36. gooddata_pipelines/provisioning/entities/users/permissions.py +153 -0
  37. gooddata_pipelines/provisioning/entities/users/user_groups.py +212 -0
  38. gooddata_pipelines/provisioning/entities/users/users.py +179 -0
  39. gooddata_pipelines/provisioning/entities/workspaces/__init__.py +1 -0
  40. gooddata_pipelines/provisioning/entities/workspaces/models.py +78 -0
  41. gooddata_pipelines/provisioning/entities/workspaces/workspace.py +263 -0
  42. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_filters.py +286 -0
  43. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_parser.py +123 -0
  44. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_validator.py +188 -0
  45. gooddata_pipelines/provisioning/provisioning.py +132 -0
  46. gooddata_pipelines/provisioning/utils/__init__.py +1 -0
  47. gooddata_pipelines/provisioning/utils/context_objects.py +32 -0
  48. gooddata_pipelines/provisioning/utils/exceptions.py +95 -0
  49. gooddata_pipelines/provisioning/utils/utils.py +80 -0
  50. gooddata_pipelines/py.typed +0 -0
  51. gooddata_pipelines-1.47.1.dev1.dist-info/METADATA +85 -0
  52. gooddata_pipelines-1.47.1.dev1.dist-info/RECORD +54 -0
  53. gooddata_pipelines-1.47.1.dev1.dist-info/WHEEL +4 -0
  54. gooddata_pipelines-1.47.1.dev1.dist-info/licenses/LICENSE.txt +1 -277
@@ -0,0 +1,430 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ import json
4
+ import os
5
+ import shutil
6
+ import tempfile
7
+ import threading
8
+ import time
9
+ import traceback
10
+ from concurrent.futures import ThreadPoolExecutor, as_completed
11
+ from dataclasses import dataclass
12
+ from pathlib import Path
13
+ from typing import Any, Type
14
+
15
+ import requests
16
+ import yaml
17
+ from gooddata_sdk.utils import PROFILES_FILE_PATH, profile_content
18
+
19
+ from gooddata_pipelines.api.gooddata_api_wrapper import GoodDataApi
20
+ from gooddata_pipelines.backup_and_restore.backup_input_processor import (
21
+ BackupInputProcessor,
22
+ )
23
+ from gooddata_pipelines.backup_and_restore.constants import (
24
+ BackupSettings,
25
+ DirNames,
26
+ )
27
+ from gooddata_pipelines.backup_and_restore.models.input_type import InputType
28
+ from gooddata_pipelines.backup_and_restore.models.storage import (
29
+ BackupRestoreConfig,
30
+ StorageType,
31
+ )
32
+ from gooddata_pipelines.backup_and_restore.storage.base_storage import (
33
+ BackupStorage,
34
+ )
35
+ from gooddata_pipelines.backup_and_restore.storage.local_storage import (
36
+ LocalStorage,
37
+ )
38
+ from gooddata_pipelines.backup_and_restore.storage.s3_storage import (
39
+ S3Storage,
40
+ )
41
+ from gooddata_pipelines.logger import LogObserver
42
+
43
+
44
+ @dataclass
45
+ class BackupBatch:
46
+ list_of_ids: list[str]
47
+
48
+
49
+ class BackupManager:
50
+ storage: BackupStorage
51
+
52
+ def __init__(self, host: str, token: str, config: BackupRestoreConfig):
53
+ self._api = GoodDataApi(host, token)
54
+ self.logger = LogObserver()
55
+
56
+ self.config = config
57
+
58
+ self.storage = self.get_storage(self.config)
59
+ self.org_id = self._api.get_organization_id()
60
+
61
+ self.loader = BackupInputProcessor(self._api, self.config.api_page_size)
62
+
63
+ @classmethod
64
+ def create(
65
+ cls: Type["BackupManager"],
66
+ config: BackupRestoreConfig,
67
+ host: str,
68
+ token: str,
69
+ ) -> "BackupManager":
70
+ """Creates a backup worker instance using provided host and token."""
71
+ return cls(host=host, token=token, config=config)
72
+
73
+ @classmethod
74
+ def create_from_profile(
75
+ cls: Type["BackupManager"],
76
+ config: BackupRestoreConfig,
77
+ profile: str = "default",
78
+ profiles_path: Path = PROFILES_FILE_PATH,
79
+ ) -> "BackupManager":
80
+ """Creates a backup worker instance using a GoodData profile file."""
81
+ content = profile_content(profile, profiles_path)
82
+ return cls(**content, config=config)
83
+
84
+ def get_storage(self, conf: BackupRestoreConfig) -> BackupStorage:
85
+ """Returns the storage class based on the storage type."""
86
+ if conf.storage_type == StorageType.S3:
87
+ return S3Storage(conf)
88
+ elif conf.storage_type == StorageType.LOCAL:
89
+ return LocalStorage(conf)
90
+ else:
91
+ raise RuntimeError(
92
+ f'Unsupported storage type "{conf.storage_type.value}".'
93
+ )
94
+
95
+ def get_user_data_filters(self, ws_id: str) -> dict:
96
+ """Returns the user data filters for the specified workspace."""
97
+ response: requests.Response = self._api.get_user_data_filters(ws_id)
98
+ if response.ok:
99
+ return response.json()
100
+ else:
101
+ raise RuntimeError(f"{response.status_code}: {response.text}")
102
+
103
+ def store_user_data_filters(
104
+ self,
105
+ user_data_filters: dict,
106
+ export_path: Path,
107
+ ws_id: str,
108
+ ) -> None:
109
+ """Stores the user data filters in the specified export path."""
110
+ os.mkdir(
111
+ os.path.join(
112
+ export_path,
113
+ "gooddata_layouts",
114
+ self.org_id,
115
+ "workspaces",
116
+ ws_id,
117
+ "user_data_filters",
118
+ )
119
+ )
120
+
121
+ for filter in user_data_filters["userDataFilters"]:
122
+ udf_file_path = os.path.join(
123
+ export_path,
124
+ "gooddata_layouts",
125
+ self.org_id,
126
+ "workspaces",
127
+ ws_id,
128
+ "user_data_filters",
129
+ filter["id"] + ".yaml",
130
+ )
131
+ self.write_to_yaml(udf_file_path, filter)
132
+
133
+ @staticmethod
134
+ def move_folder(source: Path, destination: Path) -> None:
135
+ """Moves the source folder to the destination."""
136
+ shutil.move(source, destination)
137
+
138
+ @staticmethod
139
+ def write_to_yaml(path: str, source: Any) -> None:
140
+ """Writes the source to a YAML file."""
141
+ with open(path, "w") as outfile:
142
+ yaml.dump(source, outfile)
143
+
144
+ def get_automations_from_api(self, workspace_id: str) -> Any:
145
+ """Returns automations for the workspace as JSON."""
146
+ response: requests.Response = self._api.get_automations(workspace_id)
147
+ if response.ok:
148
+ return response.json()
149
+ else:
150
+ raise RuntimeError(
151
+ f"Failed to get automations for {workspace_id}. "
152
+ + f"{response.status_code}: {response.text}"
153
+ )
154
+
155
+ def store_automations(self, export_path: Path, workspace_id: str) -> None:
156
+ """Stores the automations in the specified export path."""
157
+ # Get the automations from the API
158
+ automations: Any = self.get_automations_from_api(workspace_id)
159
+
160
+ automations_folder_path: Path = Path(
161
+ export_path,
162
+ "gooddata_layouts",
163
+ self.org_id,
164
+ "workspaces",
165
+ workspace_id,
166
+ "automations",
167
+ )
168
+
169
+ automations_file_path: Path = Path(
170
+ automations_folder_path, "automations.json"
171
+ )
172
+
173
+ os.mkdir(automations_folder_path)
174
+
175
+ # Store the automations in a JSON file
176
+ if len(automations["data"]) > 0:
177
+ with open(automations_file_path, "w") as f:
178
+ json.dump(automations, f)
179
+
180
+ def store_declarative_filter_views(
181
+ self, export_path: Path, workspace_id: str
182
+ ) -> None:
183
+ """Stores the filter views in the specified export path."""
184
+ # Get the filter views YAML files from the API
185
+ self._api.store_declarative_filter_views(workspace_id, export_path)
186
+
187
+ # Move filter views to the subfolder containing analytics model
188
+ self.move_folder(
189
+ Path(export_path, "gooddata_layouts", self.org_id, "filter_views"),
190
+ Path(
191
+ export_path,
192
+ "gooddata_layouts",
193
+ self.org_id,
194
+ "workspaces",
195
+ workspace_id,
196
+ "filter_views",
197
+ ),
198
+ )
199
+
200
+ def get_workspace_export(
201
+ self,
202
+ local_target_path: str,
203
+ workspaces_to_export: list[str],
204
+ ) -> None:
205
+ """
206
+ Iterate over all workspaces in the workspaces_to_export list and store
207
+ their declarative_workspace and their respective user data filters.
208
+ """
209
+ exported = False
210
+ for workspace_id in workspaces_to_export:
211
+ export_path = Path(
212
+ local_target_path,
213
+ self.org_id,
214
+ workspace_id,
215
+ BackupSettings.TIMESTAMP_SDK_FOLDER,
216
+ )
217
+
218
+ try:
219
+ user_data_filters = self.get_user_data_filters(workspace_id)
220
+ except Exception as e:
221
+ self.logger.error(
222
+ f"Skipping backup of {workspace_id} - check if workspace exists."
223
+ + f"{e.__class__.__name__}: {e}"
224
+ )
225
+ continue
226
+
227
+ try:
228
+ # TODO: consider using the API to get JSON declarations in memory
229
+ # or check if there is a way to get YAML structures directly from
230
+ # the SDK. That way we could save and package all the declarations
231
+ # directly instead of reorganizing the folder structures. That should
232
+ # be more transparent/readable and possibly safer for threading
233
+ self._api.store_declarative_workspace(workspace_id, export_path)
234
+ self.store_declarative_filter_views(export_path, workspace_id)
235
+ self.store_automations(export_path, workspace_id)
236
+
237
+ self.store_user_data_filters(
238
+ user_data_filters, export_path, workspace_id
239
+ )
240
+ self.logger.info(f"Stored export for {workspace_id}")
241
+ exported = True
242
+ except Exception as e:
243
+ self.logger.error(
244
+ f"Skipping {workspace_id}. {e.__class__.__name__} encountered: {e}"
245
+ )
246
+
247
+ if not exported:
248
+ raise RuntimeError(
249
+ "None of the workspaces were exported. Check that the source file "
250
+ + "is correct and that the workspaces exist."
251
+ )
252
+
253
+ def archive_gooddata_layouts_to_zip(self, folder: str) -> None:
254
+ """Archives the gooddata_layouts directory to a zip file."""
255
+ try:
256
+ target_subdir = ""
257
+ for subdir, dirs, files in os.walk(folder):
258
+ if DirNames.LAYOUTS in dirs:
259
+ target_subdir = os.path.join(subdir, dirs[0])
260
+ if DirNames.LDM in dirs:
261
+ inner_layouts_dir = subdir + "/gooddata_layouts"
262
+ os.mkdir(inner_layouts_dir)
263
+ for dir in dirs:
264
+ shutil.move(
265
+ os.path.join(subdir, dir),
266
+ os.path.join(inner_layouts_dir),
267
+ )
268
+ shutil.make_archive(target_subdir, "zip", subdir)
269
+ shutil.rmtree(target_subdir)
270
+ except Exception as e:
271
+ self.logger.error(f"Error archiving {folder} to zip: {e}")
272
+ raise
273
+
274
+ def split_to_batches(
275
+ self, workspaces_to_export: list[str], batch_size: int
276
+ ) -> list[BackupBatch]:
277
+ """Splits the list of workspaces to into batches of the specified size.
278
+ The batch is respresented as a list of workspace IDs.
279
+ Returns a list of batches (i.e. list of lists of IDs)
280
+ """
281
+ list_of_batches = []
282
+ while workspaces_to_export:
283
+ batch = BackupBatch(workspaces_to_export[:batch_size])
284
+ workspaces_to_export = workspaces_to_export[batch_size:]
285
+ list_of_batches.append(batch)
286
+
287
+ return list_of_batches
288
+
289
+ def process_batch(
290
+ self,
291
+ batch: BackupBatch,
292
+ stop_event: threading.Event,
293
+ retry_count: int = 0,
294
+ ) -> None:
295
+ """Processes a single batch of workspaces for backup.
296
+ If the batch processing fails, the function will wait
297
+ and retry with exponential backoff up to BackupSettings.MAX_RETRIES.
298
+ The base wait time is defined by BackupSettings.RETRY_DELAY.
299
+ """
300
+ if stop_event.is_set():
301
+ # If the stop_event flag is set, return. This will terminate the thread.
302
+ return
303
+
304
+ try:
305
+ with tempfile.TemporaryDirectory() as tmpdir:
306
+ self.get_workspace_export(tmpdir, batch.list_of_ids)
307
+
308
+ self.archive_gooddata_layouts_to_zip(
309
+ str(Path(tmpdir, self.org_id))
310
+ )
311
+
312
+ self.storage.export(tmpdir, self.org_id)
313
+
314
+ except Exception as e:
315
+ if stop_event.is_set():
316
+ return
317
+
318
+ elif retry_count < BackupSettings.MAX_RETRIES:
319
+ # Retry with exponential backoff until MAX_RETRIES.
320
+ next_retry = retry_count + 1
321
+ wait_time = BackupSettings.RETRY_DELAY**next_retry
322
+ self.logger.info(
323
+ f"{e.__class__.__name__} encountered while processing a batch. "
324
+ + f"Retrying {next_retry}/{BackupSettings.MAX_RETRIES} "
325
+ + f"in {wait_time} seconds..."
326
+ )
327
+
328
+ time.sleep(wait_time)
329
+ self.process_batch(batch, stop_event, next_retry)
330
+ else:
331
+ # If the batch fails after MAX_RETRIES, raise the error.
332
+ self.logger.error(f"Batch failed: {e.__class__.__name__}: {e}")
333
+ raise
334
+
335
+ def process_batches_in_parallel(
336
+ self,
337
+ batches: list[BackupBatch],
338
+ ) -> None:
339
+ """
340
+ Processes batches in parallel using concurrent.futures. Will stop the processing
341
+ if any one of the batches fails.
342
+ """
343
+
344
+ # Create a threading flag to control the threads that have already been started
345
+ stop_event = threading.Event()
346
+
347
+ with ThreadPoolExecutor(
348
+ max_workers=BackupSettings.MAX_WORKERS
349
+ ) as executor:
350
+ # Set the futures tasks.
351
+ futures = []
352
+ for batch in batches:
353
+ futures.append(
354
+ executor.submit(
355
+ self.process_batch,
356
+ batch,
357
+ stop_event,
358
+ )
359
+ )
360
+
361
+ # Process futures as they complete
362
+ for future in as_completed(futures):
363
+ try:
364
+ future.result()
365
+ except Exception:
366
+ # On failure, set the flag to True - signal running processes to stop.
367
+ stop_event.set()
368
+
369
+ # Cancel unstarted threads.
370
+ for f in futures:
371
+ if not f.done():
372
+ f.cancel()
373
+
374
+ raise
375
+
376
+ def backup_workspaces(self, path_to_csv: str) -> None:
377
+ """Runs the backup process for a list of workspace IDs.
378
+
379
+ Will read the list of workspace IDs from a CSV file and create backup for
380
+ each workspace in storage specified in the configuration.
381
+
382
+ Args:
383
+ path_to_csv (str): Path to a CSV file containing a list of workspace IDs.
384
+ """
385
+ self.backup(InputType.LIST_OF_WORKSPACES, path_to_csv)
386
+
387
+ def backup_hierarchies(self, path_to_csv: str) -> None:
388
+ """Runs the backup process for a list of hierarchies.
389
+
390
+ Will read the list of workspace IDs from a CSV file and create backup for
391
+ each those workspaces' hierarchies in storage specified in the configuration.
392
+ Workspace hierarchy means the workspace itself and all its direct and
393
+ indirect children.
394
+
395
+ Args:
396
+ path_to_csv (str): Path to a CSV file containing a list of workspace IDs.
397
+ """
398
+ self.backup(InputType.HIERARCHY, path_to_csv)
399
+
400
+ def backup_entire_organization(self) -> None:
401
+ """Runs the backup process for the entire organization.
402
+
403
+ Will create backup for all workspaces in the organization in storage
404
+ specified in the configuration.
405
+ """
406
+ self.backup(InputType.ORGANIZATION)
407
+
408
+ def backup(
409
+ self, input_type: InputType, path_to_csv: str | None = None
410
+ ) -> None:
411
+ """Runs the backup process with selected input type."""
412
+ try:
413
+ workspaces_to_export: list[str] = self.loader.get_ids_to_backup(
414
+ input_type, path_to_csv
415
+ )
416
+ batches = self.split_to_batches(
417
+ workspaces_to_export, self.config.batch_size
418
+ )
419
+
420
+ self.logger.info(
421
+ f"Exporting {len(workspaces_to_export)} workspaces in {len(batches)} batches."
422
+ )
423
+
424
+ self.process_batches_in_parallel(batches)
425
+
426
+ self.logger.info("Backup completed")
427
+ except Exception as e:
428
+ self.logger.error(f"Backup failed: {e.__class__.__name__}: {e}")
429
+ self.logger.error(traceback.format_exc())
430
+ raise
@@ -0,0 +1,42 @@
1
+ import datetime
2
+ from dataclasses import dataclass
3
+
4
+ from gooddata_sdk._version import __version__ as sdk_version
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class DirNames:
9
+ """
10
+ Folder names used in the SDK backup process:
11
+ - LAYOUTS - GoodData Layouts
12
+ - LDM - Logical Data Model
13
+ - AM - Analytics Model
14
+ - UDF - User Data Filters
15
+ """
16
+
17
+ LAYOUTS = "gooddata_layouts"
18
+ LDM = "ldm"
19
+ AM = "analytics_model"
20
+ UDF = "user_data_filters"
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class ConcurrencyDefaults:
25
+ MAX_WORKERS = 2
26
+ DEFAULT_BATCH_SIZE = 100
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class ApiDefaults:
31
+ DEFAULT_PAGE_SIZE = 100
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class BackupSettings(ConcurrencyDefaults, ApiDefaults):
36
+ MAX_RETRIES = 3
37
+ RETRY_DELAY = 5 # seconds
38
+ TIMESTAMP_SDK_FOLDER = (
39
+ str(datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
40
+ + "-"
41
+ + sdk_version.replace(".", "_")
42
+ )
@@ -0,0 +1,41 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ import csv
4
+ from typing import Iterator
5
+
6
+
7
+ class CSVReader:
8
+ """Class to read the input CSV file and return its content as a list of strings."""
9
+
10
+ @staticmethod
11
+ def read_backup_csv(file_path: str) -> list[str]:
12
+ """Reads the input CSV file, validates its structure, and returns its
13
+ content as a list of strings.
14
+ """
15
+
16
+ with open(file_path) as csv_file:
17
+ reader: Iterator[list[str]] = csv.reader(
18
+ csv_file, skipinitialspace=True
19
+ )
20
+
21
+ try:
22
+ # Skip the header
23
+ headers = next(reader)
24
+
25
+ if len(headers) > 1:
26
+ raise ValueError(
27
+ "Input file contains more than one column. Please check the input and try again."
28
+ )
29
+
30
+ except StopIteration:
31
+ # Raise an error if the iterator is empty
32
+ raise ValueError("No content found in the CSV file.")
33
+
34
+ # Read the content
35
+ content = [row[0] for row in reader]
36
+
37
+ # If the content is empty (no rows), raise an error
38
+ if not content:
39
+ raise ValueError("No workspaces found in the CSV file.")
40
+
41
+ return content
@@ -0,0 +1 @@
1
+ # (C) 2025 GoodData Corporation
@@ -0,0 +1,11 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ from enum import Enum
4
+
5
+
6
+ class InputType(Enum):
7
+ """Input type for the backup."""
8
+
9
+ LIST_OF_WORKSPACES = "list-of-workspaces"
10
+ HIERARCHY = "list-of-parents"
11
+ ORGANIZATION = "entire-organization"
@@ -0,0 +1,58 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ from enum import Enum
4
+ from typing import Annotated, TypeAlias
5
+
6
+ import yaml
7
+ from pydantic import BaseModel, Field
8
+
9
+ from gooddata_pipelines.backup_and_restore.constants import BackupSettings
10
+
11
+
12
+ class StorageType(Enum):
13
+ """Type of storage."""
14
+
15
+ S3 = "s3"
16
+ LOCAL = "local"
17
+
18
+
19
+ class S3StorageConfig(BaseModel):
20
+ """Configuration for S3 storage."""
21
+
22
+ backup_path: str
23
+ bucket: str
24
+ profile: str = "default"
25
+
26
+
27
+ class LocalStorageConfig(BaseModel):
28
+ """Placeholder for local storage config."""
29
+
30
+
31
+ StorageConfig: TypeAlias = S3StorageConfig | LocalStorageConfig
32
+
33
+
34
+ class BackupRestoreConfig(BaseModel):
35
+ """Configuration for backup and restore."""
36
+
37
+ storage_type: StorageType
38
+ storage: StorageConfig | None = Field(default=None)
39
+ api_page_size: Annotated[
40
+ int,
41
+ Field(
42
+ gt=0,
43
+ description="Page size must be greater than 0",
44
+ ),
45
+ ] = Field(default=BackupSettings.DEFAULT_PAGE_SIZE)
46
+ batch_size: Annotated[
47
+ int,
48
+ Field(
49
+ gt=0,
50
+ description="Batch size must be greater than 0",
51
+ ),
52
+ ] = Field(default=BackupSettings.DEFAULT_BATCH_SIZE)
53
+
54
+ @classmethod
55
+ def from_yaml(cls, conf_path: str) -> "BackupRestoreConfig":
56
+ with open(conf_path, "r") as stream:
57
+ conf: dict = yaml.safe_load(stream)
58
+ return cls(**conf)
@@ -0,0 +1,51 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ from pydantic import (
4
+ BaseModel,
5
+ ConfigDict,
6
+ )
7
+ from pydantic.alias_generators import (
8
+ to_camel,
9
+ )
10
+
11
+
12
+ class Page(BaseModel):
13
+ size: int
14
+ total_elements: int
15
+ total_pages: int
16
+ number: int
17
+
18
+ model_config = ConfigDict(
19
+ alias_generator=to_camel,
20
+ populate_by_name=True,
21
+ )
22
+
23
+
24
+ class Hierarchy(BaseModel):
25
+ children_count: int
26
+
27
+ model_config = ConfigDict(
28
+ alias_generator=to_camel,
29
+ populate_by_name=True,
30
+ )
31
+
32
+
33
+ class Meta(BaseModel):
34
+ page: Page | None = None
35
+ hierarchy: Hierarchy | None = None
36
+
37
+
38
+ class Workspace(BaseModel):
39
+ id: str
40
+ meta: Meta | None = None
41
+
42
+
43
+ class Links(BaseModel):
44
+ self: str
45
+ next: str | None = None
46
+
47
+
48
+ class WorkspaceResponse(BaseModel):
49
+ data: list[Workspace]
50
+ links: Links
51
+ meta: Meta
@@ -0,0 +1 @@
1
+ # (C) 2025 GoodData Corporation
@@ -0,0 +1,18 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ import abc
4
+
5
+ from gooddata_pipelines.backup_and_restore.models.storage import (
6
+ BackupRestoreConfig,
7
+ )
8
+ from gooddata_pipelines.logger import LogObserver
9
+
10
+
11
+ class BackupStorage(abc.ABC):
12
+ def __init__(self, conf: BackupRestoreConfig):
13
+ self.logger = LogObserver()
14
+
15
+ @abc.abstractmethod
16
+ def export(self, folder: str, org_id: str) -> None:
17
+ """Exports the content of the folder to the storage."""
18
+ raise NotImplementedError
@@ -0,0 +1,37 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ import shutil
4
+ from pathlib import Path
5
+
6
+ from gooddata_pipelines.backup_and_restore.models.storage import (
7
+ BackupRestoreConfig,
8
+ )
9
+ from gooddata_pipelines.backup_and_restore.storage.base_storage import (
10
+ BackupStorage,
11
+ )
12
+
13
+
14
+ class LocalStorage(BackupStorage):
15
+ def __init__(self, conf: BackupRestoreConfig):
16
+ super().__init__(conf)
17
+
18
+ def _export(
19
+ self, folder: str, org_id: str, export_folder: str = "local_backups"
20
+ ) -> None:
21
+ """Copies the content of the folder to local storage as backup."""
22
+ self.logger.info(f"Saving {org_id} to local storage")
23
+ shutil.copytree(
24
+ Path(folder), Path(Path.cwd(), export_folder), dirs_exist_ok=True
25
+ )
26
+
27
+ def export(
28
+ self, folder: str, org_id: str, export_folder: str = "local_backups"
29
+ ) -> None:
30
+ """Copies the content of the folder to local storage as backup."""
31
+ try:
32
+ self._export(folder, org_id, export_folder)
33
+ except Exception as e:
34
+ self.logger.error(
35
+ f"Error exporting {folder} to {export_folder}: {e}"
36
+ )
37
+ raise