gooddata-pipelines 1.47.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gooddata-pipelines might be problematic. Click here for more details.

Files changed (54) hide show
  1. gooddata_pipelines/__init__.py +59 -0
  2. gooddata_pipelines/_version.py +7 -0
  3. gooddata_pipelines/api/__init__.py +5 -0
  4. gooddata_pipelines/api/exceptions.py +41 -0
  5. gooddata_pipelines/api/gooddata_api.py +309 -0
  6. gooddata_pipelines/api/gooddata_api_wrapper.py +36 -0
  7. gooddata_pipelines/api/gooddata_sdk.py +374 -0
  8. gooddata_pipelines/api/utils.py +43 -0
  9. gooddata_pipelines/backup_and_restore/__init__.py +1 -0
  10. gooddata_pipelines/backup_and_restore/backup_input_processor.py +195 -0
  11. gooddata_pipelines/backup_and_restore/backup_manager.py +430 -0
  12. gooddata_pipelines/backup_and_restore/constants.py +42 -0
  13. gooddata_pipelines/backup_and_restore/csv_reader.py +41 -0
  14. gooddata_pipelines/backup_and_restore/models/__init__.py +1 -0
  15. gooddata_pipelines/backup_and_restore/models/input_type.py +11 -0
  16. gooddata_pipelines/backup_and_restore/models/storage.py +58 -0
  17. gooddata_pipelines/backup_and_restore/models/workspace_response.py +51 -0
  18. gooddata_pipelines/backup_and_restore/storage/__init__.py +1 -0
  19. gooddata_pipelines/backup_and_restore/storage/base_storage.py +18 -0
  20. gooddata_pipelines/backup_and_restore/storage/local_storage.py +37 -0
  21. gooddata_pipelines/backup_and_restore/storage/s3_storage.py +71 -0
  22. gooddata_pipelines/logger/__init__.py +8 -0
  23. gooddata_pipelines/logger/logger.py +115 -0
  24. gooddata_pipelines/provisioning/__init__.py +31 -0
  25. gooddata_pipelines/provisioning/assets/wdf_setting.json +14 -0
  26. gooddata_pipelines/provisioning/entities/__init__.py +1 -0
  27. gooddata_pipelines/provisioning/entities/user_data_filters/__init__.py +1 -0
  28. gooddata_pipelines/provisioning/entities/user_data_filters/models/__init__.py +1 -0
  29. gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py +32 -0
  30. gooddata_pipelines/provisioning/entities/user_data_filters/user_data_filters.py +221 -0
  31. gooddata_pipelines/provisioning/entities/users/__init__.py +1 -0
  32. gooddata_pipelines/provisioning/entities/users/models/__init__.py +1 -0
  33. gooddata_pipelines/provisioning/entities/users/models/permissions.py +242 -0
  34. gooddata_pipelines/provisioning/entities/users/models/user_groups.py +64 -0
  35. gooddata_pipelines/provisioning/entities/users/models/users.py +114 -0
  36. gooddata_pipelines/provisioning/entities/users/permissions.py +153 -0
  37. gooddata_pipelines/provisioning/entities/users/user_groups.py +212 -0
  38. gooddata_pipelines/provisioning/entities/users/users.py +179 -0
  39. gooddata_pipelines/provisioning/entities/workspaces/__init__.py +1 -0
  40. gooddata_pipelines/provisioning/entities/workspaces/models.py +78 -0
  41. gooddata_pipelines/provisioning/entities/workspaces/workspace.py +263 -0
  42. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_filters.py +286 -0
  43. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_parser.py +123 -0
  44. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_validator.py +188 -0
  45. gooddata_pipelines/provisioning/provisioning.py +132 -0
  46. gooddata_pipelines/provisioning/utils/__init__.py +1 -0
  47. gooddata_pipelines/provisioning/utils/context_objects.py +32 -0
  48. gooddata_pipelines/provisioning/utils/exceptions.py +95 -0
  49. gooddata_pipelines/provisioning/utils/utils.py +80 -0
  50. gooddata_pipelines/py.typed +0 -0
  51. gooddata_pipelines-1.47.1.dev1.dist-info/METADATA +85 -0
  52. gooddata_pipelines-1.47.1.dev1.dist-info/RECORD +54 -0
  53. gooddata_pipelines-1.47.1.dev1.dist-info/WHEEL +4 -0
  54. gooddata_pipelines-1.47.1.dev1.dist-info/licenses/LICENSE.txt +1 -277
@@ -0,0 +1,374 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ """Interaction with GoodData Cloud via the Gooddata Python SDK."""
4
+
5
+ from pathlib import Path
6
+ from typing import Callable
7
+
8
+ from gooddata_sdk.catalog.permission.declarative_model.permission import (
9
+ CatalogDeclarativeWorkspacePermissions,
10
+ )
11
+ from gooddata_sdk.catalog.user.entity_model.user import CatalogUser
12
+ from gooddata_sdk.catalog.user.entity_model.user_group import CatalogUserGroup
13
+ from gooddata_sdk.catalog.workspace.declarative_model.workspace.workspace import (
14
+ CatalogDeclarativeWorkspaceDataFilters,
15
+ )
16
+ from gooddata_sdk.catalog.workspace.entity_model.user_data_filter import (
17
+ CatalogUserDataFilter,
18
+ )
19
+ from gooddata_sdk.catalog.workspace.entity_model.workspace import (
20
+ CatalogWorkspace,
21
+ )
22
+ from gooddata_sdk.sdk import GoodDataSdk
23
+
24
+ from gooddata_pipelines.api.utils import raise_with_context
25
+
26
+
27
+ def apply_to_all_methods(decorator: Callable) -> Callable:
28
+ def decorate(cls: type) -> type:
29
+ for attr in cls.__dict__:
30
+ if callable(getattr(cls, attr)) and not attr.startswith("__"):
31
+ setattr(cls, attr, decorator(getattr(cls, attr)))
32
+ return cls
33
+
34
+ return decorate
35
+
36
+
37
+ @apply_to_all_methods(raise_with_context())
38
+ class SdkMethods:
39
+ """
40
+ Class to intaract with GoodData Cloud via the Gooddata Python SDK.
41
+ """
42
+
43
+ _sdk: GoodDataSdk
44
+
45
+ def get_organization_id(self) -> str:
46
+ return self._sdk.catalog_organization.organization_id
47
+
48
+ def check_workspace_exists(self, workspace_id: str) -> bool:
49
+ try:
50
+ self._sdk.catalog_workspace.get_workspace(workspace_id)
51
+ return True
52
+ except Exception:
53
+ return False
54
+
55
+ def get_workspace(self, workspace_id: str, **_: str) -> CatalogWorkspace:
56
+ """
57
+ Calls GoodData Python SDK to retrieve a workspace by its ID.
58
+
59
+ Args:
60
+ workspace_id (str): The ID of the workspace to retrieve.
61
+ Returns:
62
+ CatalogWorkspace: The workspace object retrieved from the SDK.
63
+ Raises:
64
+ GoodDataApiException: If the workspace cannot be retrieved, an exception
65
+ is raised with additional context information.
66
+ """
67
+ return self._sdk.catalog_workspace.get_workspace(workspace_id)
68
+
69
+ def delete_panther_workspace(self, workspace_id: str) -> None:
70
+ """
71
+ Calls GoodData Python SDK to delete a workspace by its ID.
72
+
73
+ Args:
74
+ workspace_id (str): The ID of the workspace to delete.
75
+ Raises:
76
+ GoodDataApiException: If the workspace cannot be deleted, an exception
77
+ is raised with additional context information.
78
+ """
79
+ self._sdk.catalog_workspace.delete_workspace(workspace_id)
80
+
81
+ def create_or_update_panther_workspace(
82
+ self,
83
+ workspace_id: str,
84
+ workspace_name: str,
85
+ parent_id: str | None,
86
+ **_: str,
87
+ ) -> None:
88
+ """
89
+ Calls GoodData Python SDK to create or update a workspace with the given ID,
90
+ name, and parent ID.
91
+
92
+ Args:
93
+ workspace_id (str): The ID of the workspace to create or update.
94
+ workspace_name (str): The name of the workspace.
95
+ parent_id (str | None): The ID of the parent workspace, if any.
96
+ Returns:
97
+ None
98
+ Raises:
99
+ GoodDataApiException: If the workspace cannot be created or updated,
100
+ an exception is raised with additional context information.
101
+ """
102
+ return self._sdk.catalog_workspace.create_or_update(
103
+ CatalogWorkspace(
104
+ workspace_id=workspace_id,
105
+ name=workspace_name,
106
+ parent_id=parent_id,
107
+ )
108
+ )
109
+
110
+ def get_panther_children_workspaces(
111
+ self, parent_workspace_ids: set[str]
112
+ ) -> list[CatalogWorkspace]:
113
+ """
114
+ Calls GoodData Python SDK to retrieve all workspaces in domain and filters the
115
+ result by the set of parent workspace IDs.
116
+
117
+ Args:
118
+ parent_workspace_ids (set[str]): A set of parent workspace IDs to filter
119
+ child workspaces.
120
+ Returns:
121
+ list[CatalogWorkspace]: List of child workspaces in the parent workspace.
122
+ """
123
+ all_workspaces: list[CatalogWorkspace] = self.list_workspaces()
124
+
125
+ children: list[CatalogWorkspace] = [
126
+ workspace
127
+ for workspace in all_workspaces
128
+ if workspace.parent_id in parent_workspace_ids
129
+ ]
130
+
131
+ return children
132
+
133
+ def list_workspaces(self) -> list[CatalogWorkspace]:
134
+ """Retrieves all workspaces in the GoodData Cloud domain.
135
+
136
+ Returns:
137
+ list[CatalogWorkspace]: A list of all workspaces in the domain.
138
+ Raises:
139
+ GoodDataApiException: If the workspaces cannot be retrieved, an exception
140
+ is raised with additional context information.
141
+ """
142
+ return self._sdk.catalog_workspace.list_workspaces()
143
+
144
+ def get_declarative_permissions(
145
+ self, workspace_id: str
146
+ ) -> CatalogDeclarativeWorkspacePermissions:
147
+ """
148
+ Retrieves the declarative permissions for a given workspace.
149
+
150
+ Args:
151
+ workspace_id (str): The ID of the workspace for which to retrieve
152
+ permissions.
153
+ Returns:
154
+ CatalogDeclarativeWorkspacePermissions: The declarative permissions
155
+ for the workspace.
156
+ Raises:
157
+ GoodDataApiException: If the permissions cannot be retrieved, an exception
158
+ is raised with additional context information.
159
+ """
160
+ return self._sdk.catalog_permission.get_declarative_permissions(
161
+ workspace_id
162
+ )
163
+
164
+ def put_declarative_permissions(
165
+ self,
166
+ workspace_id: str,
167
+ ws_permissions: CatalogDeclarativeWorkspacePermissions,
168
+ ) -> None:
169
+ """
170
+ Updates the declarative permissions for a given workspace.
171
+
172
+ Args:
173
+ workspace_id (str): The ID of the workspace for which to update
174
+ permissions.
175
+ ws_permissions (CatalogDeclarativeWorkspacePermissions): The new
176
+ declarative permissions to set for the workspace.
177
+ Returns:
178
+ None
179
+ Raises:
180
+ GoodDataApiException: If the permissions cannot be updated, an exception
181
+ is raised with additional context information.
182
+ """
183
+ return self._sdk.catalog_permission.put_declarative_permissions(
184
+ workspace_id, ws_permissions
185
+ )
186
+
187
+ def get_user(self, user_id: str, **_: str) -> CatalogUser:
188
+ """
189
+ Calls GoodData Python SDK to retrieve a user by its ID.
190
+
191
+ Args:
192
+ user_id (str): The ID of the user to retrieve.
193
+ Returns:
194
+ CatalogUser: The user object retrieved from the SDK.
195
+ Raises:
196
+ GoodDataApiException: If the user cannot be retrieved, an exception
197
+ is raised with additional context information.
198
+ """
199
+ return self._sdk.catalog_user.get_user(user_id)
200
+
201
+ def create_or_update_user(self, user: CatalogUser, **_: str) -> None:
202
+ """
203
+ Calls GoodData Python SDK to create or update a user.
204
+
205
+ Args:
206
+ user (CatalogUser): The user object to create or update.
207
+ Returns:
208
+ None
209
+ Raises:
210
+ GoodDataApiException: If the user cannot be created or updated,
211
+ an exception is raised with additional context information.
212
+ """
213
+ return self._sdk.catalog_user.create_or_update_user(user)
214
+
215
+ def delete_user(self, user_id: str, **_: str) -> None:
216
+ """
217
+ Calls GoodData Python SDK to delete a user by its ID.
218
+
219
+ Args:
220
+ user_id (str): The ID of the user to delete.
221
+ Returns:
222
+ None
223
+ Raises:
224
+ GoodDataApiException: If the user cannot be deleted, an exception
225
+ is raised with additional context information.
226
+ """
227
+ return self._sdk.catalog_user.delete_user(user_id)
228
+
229
+ def get_user_group(self, user_group_id: str, **_: str) -> CatalogUserGroup:
230
+ """
231
+ Calls GoodData Python SDK to retrieve a user group by its ID.
232
+
233
+ Args:
234
+ user_group_id (str): The ID of the user group to retrieve.
235
+ Returns:
236
+ CatalogUserGroup: The user group object retrieved from the SDK.
237
+ Raises:
238
+ GoodDataApiException: If the user group cannot be retrieved, an exception
239
+ is raised with additional context information.
240
+ """
241
+ return self._sdk.catalog_user.get_user_group(user_group_id)
242
+
243
+ def list_user_groups(self) -> list[CatalogUserGroup]:
244
+ """
245
+ Calls GoodData Python SDK to retrieve all user groups.
246
+
247
+ Returns:
248
+ list[CatalogUserGroup]: A list of all user groups in the domain.
249
+ Raises:
250
+ GoodDataApiException: If the user groups cannot be retrieved, an
251
+ exception is raised with additional context information.
252
+ """
253
+ return self._sdk.catalog_user.list_user_groups()
254
+
255
+ def list_users(self) -> list[CatalogUser]:
256
+ """Calls GoodData Python SDK to retrieve all users.
257
+
258
+ Returns:
259
+ list[CatalogUser]: A list of all users in the domain.
260
+ """
261
+ return self._sdk.catalog_user.list_users()
262
+
263
+ def create_or_update_user_group(
264
+ self, catalog_user_group: CatalogUserGroup, **_: str
265
+ ) -> None:
266
+ """Calls GoodData Python SDK to create or update a user group.
267
+
268
+ Args:
269
+ catalog_user_group (CatalogUserGroup): The user group object to create or update.
270
+ Returns:
271
+ None
272
+ Raises:
273
+ GoodDataApiException: If the user group cannot be created or updated,
274
+ an exception is raised with additional context information.
275
+ """
276
+ return self._sdk.catalog_user.create_or_update_user_group(
277
+ catalog_user_group
278
+ )
279
+
280
+ def delete_user_group(self, user_group_id: str) -> None:
281
+ """Calls GoodData Python SDK to delete a user group by its ID.
282
+
283
+ Args:
284
+ user_group_id (str): The ID of the user group to delete.
285
+ Returns:
286
+ None
287
+ Raises:
288
+ GoodDataApiException: If the user group cannot be deleted, an exception
289
+ is raised with additional context information.
290
+ """
291
+ return self._sdk.catalog_user.delete_user_group(user_group_id)
292
+
293
+ def get_declarative_workspace_data_filters(
294
+ self,
295
+ ) -> CatalogDeclarativeWorkspaceDataFilters:
296
+ """Retrieves the declarative workspace data filters.
297
+
298
+ Returns:
299
+ CatalogDeclarativeWorkspaceDataFilters: The declarative workspace data filters.
300
+ Raises:
301
+ GoodDataApiException: If the declarative workspace data filters cannot be retrieved,
302
+ an exception is raised with additional context information.
303
+ """
304
+ return (
305
+ self._sdk.catalog_workspace.get_declarative_workspace_data_filters()
306
+ )
307
+
308
+ def list_user_data_filters(
309
+ self, workspace_id: str
310
+ ) -> list[CatalogUserDataFilter]:
311
+ """Lists all user data filters for a given workspace.
312
+
313
+ Args:
314
+ workspace_id (str): The ID of the workspace for which to list user data filters.
315
+ Returns:
316
+ list[CatalogUserDataFilter]: A list of user data filters for the specified workspace.
317
+ Raises:
318
+ GoodDataApiException: If the user data filters cannot be listed, an exception
319
+ is raised with additional context information.
320
+ """
321
+ return self._sdk.catalog_workspace.list_user_data_filters(workspace_id)
322
+
323
+ def delete_user_data_filter(
324
+ self, workspace_id: str, user_data_filter_id: str
325
+ ) -> None:
326
+ """Deletes a user data filter by its ID in the specified workspace.
327
+
328
+ Args:
329
+ workspace_id (str): The ID of the workspace containing the user data filter.
330
+ user_data_filter_id (str): The ID of the user data filter to delete.
331
+ Returns:
332
+ None
333
+ Raises:
334
+ GoodDataApiException: If the user data filter cannot be deleted, an exception
335
+ is raised with additional context information.
336
+ """
337
+ self._sdk.catalog_workspace.delete_user_data_filter(
338
+ workspace_id, user_data_filter_id
339
+ )
340
+
341
+ def create_or_update_user_data_filter(
342
+ self, workspace_id: str, user_data_filter: CatalogUserDataFilter
343
+ ) -> None:
344
+ """Creates or updates a user data filter in the specified workspace.
345
+
346
+ Args:
347
+ workspace_id (str): The ID of the workspace where the user data filter
348
+ should be created or updated.
349
+ user_data_filter (CatalogUserDataFilter): The user data filter object to create or update.
350
+ Returns:
351
+ None
352
+ Raises:
353
+ GoodDataApiException: If the user data filter cannot be created or updated,
354
+ an exception is raised with additional context information.
355
+ """
356
+ self._sdk.catalog_workspace.create_or_update_user_data_filter(
357
+ workspace_id, user_data_filter
358
+ )
359
+
360
+ def store_declarative_workspace(
361
+ self, workspace_id: str, export_path: Path
362
+ ) -> None:
363
+ """Stores the declarative workspace in the specified export path."""
364
+ self._sdk.catalog_workspace.store_declarative_workspace(
365
+ workspace_id, export_path
366
+ )
367
+
368
+ def store_declarative_filter_views(
369
+ self, workspace_id: str, export_path: Path
370
+ ) -> None:
371
+ """Stores the declarative filter views in the specified export path."""
372
+ self._sdk.catalog_workspace.store_declarative_filter_views(
373
+ workspace_id, export_path
374
+ )
@@ -0,0 +1,43 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ """Utility functions for GoodData Cloud API interactions."""
4
+
5
+ from typing import Any, Callable
6
+
7
+ from gooddata_api_client import ApiException # type: ignore
8
+
9
+ from gooddata_pipelines.api.exceptions import GoodDataApiException
10
+
11
+
12
+ def raise_with_context(**context_kwargs: str) -> Callable:
13
+ """
14
+ Decorator to catch exceptions raised by SDK methods and raise a GoodDataApiException
15
+ with additional context information.
16
+
17
+ Args:
18
+ context_kwargs (dict): Additional context information to include in the
19
+ GoodDataApiException.
20
+ """
21
+
22
+ def decorator(fn: Callable) -> Callable:
23
+ def wrapper(*method_args: Any, **method_kwargs: Any) -> Callable:
24
+ try:
25
+ return fn(*method_args, **method_kwargs)
26
+ except Exception as e:
27
+ # Process known exceptions
28
+ if isinstance(e, ApiException):
29
+ context_kwargs["http_status"] = f"{e.status} {e.reason}"
30
+ exception_content = e.body
31
+ else:
32
+ exception_content = str(e)
33
+
34
+ # Format the exception message: "{exception_type}: {exception_content}"
35
+ message = f"{type(e).__name__}: {exception_content}"
36
+
37
+ raise GoodDataApiException(
38
+ message, **context_kwargs, **method_kwargs
39
+ )
40
+
41
+ return wrapper
42
+
43
+ return decorator
@@ -0,0 +1 @@
1
+ # (C) 2025 GoodData Corporation
@@ -0,0 +1,195 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ from dataclasses import dataclass
4
+
5
+ import requests
6
+
7
+ from gooddata_pipelines.api import GoodDataApi
8
+ from gooddata_pipelines.api.gooddata_api import API_VERSION
9
+ from gooddata_pipelines.backup_and_restore.csv_reader import CSVReader
10
+ from gooddata_pipelines.backup_and_restore.models.input_type import InputType
11
+ from gooddata_pipelines.backup_and_restore.models.workspace_response import (
12
+ Workspace,
13
+ WorkspaceResponse,
14
+ )
15
+ from gooddata_pipelines.logger import LogObserver
16
+
17
+
18
+ class BackupInputProcessor:
19
+ """Class to handle the input CSV and prepare the actual input for the backup.
20
+
21
+ Based on the InputType value, this class will determine which approach to take
22
+ in getting the IDs of workspaces to backup. It will then call appropriate
23
+ GoodData Cloud endpoints to get the IDs and return them as a list.
24
+ """
25
+
26
+ _api: GoodDataApi
27
+ base_workspace_endpoint: str
28
+ hierarchy_endpoint: str
29
+ all_workspaces_endpoint: str
30
+
31
+ def __init__(self, api: GoodDataApi, page_size: int) -> None:
32
+ self._api = api
33
+ self.page_size = page_size
34
+ self.logger = LogObserver()
35
+ self.csv_reader = CSVReader()
36
+
37
+ self.set_endpoints()
38
+
39
+ def set_endpoints(self) -> None:
40
+ """Sets the hierarchy endpoint for the API client."""
41
+ self.base_workspace_endpoint = "/api/v1/entities/workspaces"
42
+ self.hierarchy_endpoint = (
43
+ f"{self.base_workspace_endpoint}?"
44
+ + "filter=parent.id=={parent_id}"
45
+ + f"&include=parent&page=0&size={self.page_size}&sort=name,asc&metaInclude=page,hierarchy"
46
+ )
47
+ self.all_workspaces_endpoint = f"{self.base_workspace_endpoint}?page=0&size={self.page_size}&sort=name,asc&metaInclude=page"
48
+
49
+ @dataclass
50
+ class _ProcessDataOutput:
51
+ workspace_ids: list[str]
52
+ sub_parents: list[str] | None = None
53
+
54
+ def fetch_page(self, url: str) -> WorkspaceResponse:
55
+ """Fetch a page of workspaces."""
56
+
57
+ # Separate the API path from the URL so that it can be fed to the api class
58
+ endpoint: str = url.split(f"api/{API_VERSION}")[1]
59
+ response: requests.Response = self._api._get(endpoint)
60
+ if response.ok:
61
+ return WorkspaceResponse(**response.json())
62
+ else:
63
+ raise RuntimeError(
64
+ f"Failed to fetch data from the API. URL: {endpoint}"
65
+ )
66
+
67
+ @staticmethod
68
+ def process_data(data: list[Workspace]) -> _ProcessDataOutput:
69
+ """Extract children and sub-parents from workspace data."""
70
+ children: list[str] = []
71
+ sub_parents: list[str] = []
72
+
73
+ for workspace in data:
74
+ # append child workspace IDs
75
+ children.append(workspace.id)
76
+
77
+ # if hierarchy is present and has children, append child workspace ID to sub_parents
78
+ if workspace.meta and workspace.meta.hierarchy:
79
+ if workspace.meta.hierarchy.children_count > 0:
80
+ sub_parents.append(workspace.id)
81
+ return BackupInputProcessor._ProcessDataOutput(children, sub_parents)
82
+
83
+ def log_paging_progress(self, response: WorkspaceResponse) -> None:
84
+ """Log the progress of paging through API responses if paginatino data is present"""
85
+ current_page: int | None
86
+ total_pages: int | None
87
+
88
+ if response.meta.page:
89
+ current_page = response.meta.page.number + 1
90
+ total_pages = response.meta.page.total_pages
91
+ else:
92
+ current_page = None
93
+ total_pages = None
94
+
95
+ if current_page and total_pages:
96
+ self.logger.info(f"Fetched page: {current_page} of {total_pages}")
97
+
98
+ def _paginate(
99
+ self, url: str | None
100
+ ) -> list["BackupInputProcessor._ProcessDataOutput"]:
101
+ """Paginates through the API responses and returns a list of workspace IDs."""
102
+ result: list[BackupInputProcessor._ProcessDataOutput] = []
103
+ while url:
104
+ response: WorkspaceResponse = self.fetch_page(url)
105
+ self.log_paging_progress(response)
106
+ result.append(self.process_data(response.data))
107
+ url = response.links.next
108
+
109
+ return result
110
+
111
+ def get_hierarchy(self, parent_id: str) -> list[str]:
112
+ """Returns a list of workspace IDs in the hierarchy."""
113
+ self.logger.info(f"Fetching children of {parent_id}")
114
+ url = self.hierarchy_endpoint.format(parent_id=parent_id)
115
+
116
+ all_children, sub_parents = [], []
117
+
118
+ results: list[BackupInputProcessor._ProcessDataOutput] = self._paginate(
119
+ url
120
+ )
121
+
122
+ for result in results:
123
+ all_children.extend(result.workspace_ids)
124
+ if result.sub_parents:
125
+ sub_parents.extend(result.sub_parents)
126
+
127
+ for subparent in sub_parents:
128
+ all_children += self.get_hierarchy(subparent)
129
+
130
+ if not all_children:
131
+ self.logger.warning(
132
+ f"No child workspaces found for parent workspace ID: {parent_id}"
133
+ )
134
+
135
+ return all_children
136
+
137
+ def get_all_workspaces(self) -> list[str]:
138
+ """Returns a list of all workspace IDs in the organization."""
139
+ # TODO: can be optimized - requests can be sent asynchronously.
140
+ # Use the total number of pages to calculate the number of requests
141
+ # to be sent. Use semaphore or otherwise limit the number of concurrent
142
+ # requests to avoid putting too much load on the server.
143
+ self.logger.info("Fetching all workspaces")
144
+ url = self.all_workspaces_endpoint
145
+
146
+ all_workspaces: list[str] = []
147
+
148
+ results: list[BackupInputProcessor._ProcessDataOutput] = self._paginate(
149
+ url
150
+ )
151
+
152
+ for result in results:
153
+ all_workspaces.extend(result.workspace_ids)
154
+
155
+ if not all_workspaces:
156
+ self.logger.warning("No workspaces found in the organization.")
157
+
158
+ return all_workspaces
159
+
160
+ def get_ids_to_backup(
161
+ self, input_type: InputType, path_to_csv: str | None = None
162
+ ) -> list[str]:
163
+ """Returns the list of workspace IDs to back up based on the input type."""
164
+
165
+ if input_type in (InputType.LIST_OF_WORKSPACES, InputType.HIERARCHY):
166
+ if path_to_csv is None:
167
+ raise ValueError(
168
+ f"Path to CSV is required for this input type: {input_type.value}"
169
+ )
170
+
171
+ # If we're backing up based on the list, simply read it from the CSV
172
+ if input_type == InputType.LIST_OF_WORKSPACES:
173
+ return self.csv_reader.read_backup_csv(path_to_csv)
174
+ else:
175
+ # For hierarchy backup, we read the CSV and treat it as a list of
176
+ # parent workspace IDs. Then we retrieve the children of each parent,
177
+ # including their children, and so on. The parent workspaces are
178
+ # also included in the backup.
179
+ list_of_parents = self.csv_reader.read_backup_csv(path_to_csv)
180
+ list_of_children: list[str] = []
181
+
182
+ for parent in list_of_parents:
183
+ list_of_children.extend(self.get_hierarchy(parent))
184
+
185
+ return list_of_parents + list_of_children
186
+
187
+ # If we're backing up the entire organization, we simply get all workspaces
188
+ elif input_type == InputType.ORGANIZATION:
189
+ list_of_workspaces = self.get_all_workspaces()
190
+ return list_of_workspaces
191
+
192
+ else:
193
+ # This path should be unreachable as long as the conditions above
194
+ # exhaustively check all values of InputType Enum.
195
+ raise ValueError(f"Unsupported input type: {input_type.value}")