gooddata-pipelines 1.47.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gooddata-pipelines might be problematic. Click here for more details.

Files changed (54) hide show
  1. gooddata_pipelines/__init__.py +59 -0
  2. gooddata_pipelines/_version.py +7 -0
  3. gooddata_pipelines/api/__init__.py +5 -0
  4. gooddata_pipelines/api/exceptions.py +41 -0
  5. gooddata_pipelines/api/gooddata_api.py +309 -0
  6. gooddata_pipelines/api/gooddata_api_wrapper.py +36 -0
  7. gooddata_pipelines/api/gooddata_sdk.py +374 -0
  8. gooddata_pipelines/api/utils.py +43 -0
  9. gooddata_pipelines/backup_and_restore/__init__.py +1 -0
  10. gooddata_pipelines/backup_and_restore/backup_input_processor.py +195 -0
  11. gooddata_pipelines/backup_and_restore/backup_manager.py +430 -0
  12. gooddata_pipelines/backup_and_restore/constants.py +42 -0
  13. gooddata_pipelines/backup_and_restore/csv_reader.py +41 -0
  14. gooddata_pipelines/backup_and_restore/models/__init__.py +1 -0
  15. gooddata_pipelines/backup_and_restore/models/input_type.py +11 -0
  16. gooddata_pipelines/backup_and_restore/models/storage.py +58 -0
  17. gooddata_pipelines/backup_and_restore/models/workspace_response.py +51 -0
  18. gooddata_pipelines/backup_and_restore/storage/__init__.py +1 -0
  19. gooddata_pipelines/backup_and_restore/storage/base_storage.py +18 -0
  20. gooddata_pipelines/backup_and_restore/storage/local_storage.py +37 -0
  21. gooddata_pipelines/backup_and_restore/storage/s3_storage.py +71 -0
  22. gooddata_pipelines/logger/__init__.py +8 -0
  23. gooddata_pipelines/logger/logger.py +115 -0
  24. gooddata_pipelines/provisioning/__init__.py +31 -0
  25. gooddata_pipelines/provisioning/assets/wdf_setting.json +14 -0
  26. gooddata_pipelines/provisioning/entities/__init__.py +1 -0
  27. gooddata_pipelines/provisioning/entities/user_data_filters/__init__.py +1 -0
  28. gooddata_pipelines/provisioning/entities/user_data_filters/models/__init__.py +1 -0
  29. gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py +32 -0
  30. gooddata_pipelines/provisioning/entities/user_data_filters/user_data_filters.py +221 -0
  31. gooddata_pipelines/provisioning/entities/users/__init__.py +1 -0
  32. gooddata_pipelines/provisioning/entities/users/models/__init__.py +1 -0
  33. gooddata_pipelines/provisioning/entities/users/models/permissions.py +242 -0
  34. gooddata_pipelines/provisioning/entities/users/models/user_groups.py +64 -0
  35. gooddata_pipelines/provisioning/entities/users/models/users.py +114 -0
  36. gooddata_pipelines/provisioning/entities/users/permissions.py +153 -0
  37. gooddata_pipelines/provisioning/entities/users/user_groups.py +212 -0
  38. gooddata_pipelines/provisioning/entities/users/users.py +179 -0
  39. gooddata_pipelines/provisioning/entities/workspaces/__init__.py +1 -0
  40. gooddata_pipelines/provisioning/entities/workspaces/models.py +78 -0
  41. gooddata_pipelines/provisioning/entities/workspaces/workspace.py +263 -0
  42. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_filters.py +286 -0
  43. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_parser.py +123 -0
  44. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_validator.py +188 -0
  45. gooddata_pipelines/provisioning/provisioning.py +132 -0
  46. gooddata_pipelines/provisioning/utils/__init__.py +1 -0
  47. gooddata_pipelines/provisioning/utils/context_objects.py +32 -0
  48. gooddata_pipelines/provisioning/utils/exceptions.py +95 -0
  49. gooddata_pipelines/provisioning/utils/utils.py +80 -0
  50. gooddata_pipelines/py.typed +0 -0
  51. gooddata_pipelines-1.47.1.dev1.dist-info/METADATA +85 -0
  52. gooddata_pipelines-1.47.1.dev1.dist-info/RECORD +54 -0
  53. gooddata_pipelines-1.47.1.dev1.dist-info/WHEEL +4 -0
  54. gooddata_pipelines-1.47.1.dev1.dist-info/licenses/LICENSE.txt +1 -277
@@ -0,0 +1,263 @@
1
+ # (C) 2025 GoodData Corporation
2
+ """Module for provisioning workspaces in GoodData Cloud."""
3
+
4
+ from typing import Literal
5
+
6
+ from gooddata_sdk.catalog.workspace.entity_model.workspace import (
7
+ CatalogWorkspace,
8
+ )
9
+
10
+ from gooddata_pipelines.api.exceptions import GoodDataApiException
11
+ from gooddata_pipelines.provisioning.entities.workspaces.models import (
12
+ WorkspaceDataMaps,
13
+ WorkspaceFullLoad,
14
+ WorkspaceIncrementalLoad,
15
+ )
16
+ from gooddata_pipelines.provisioning.entities.workspaces.workspace_data_filters import (
17
+ WorkspaceDataFilterManager,
18
+ )
19
+ from gooddata_pipelines.provisioning.entities.workspaces.workspace_data_parser import (
20
+ WorkspaceDataParser,
21
+ )
22
+ from gooddata_pipelines.provisioning.entities.workspaces.workspace_data_validator import (
23
+ WorkspaceDataValidator,
24
+ )
25
+ from gooddata_pipelines.provisioning.provisioning import Provisioning
26
+ from gooddata_pipelines.provisioning.utils.context_objects import (
27
+ WorkspaceContext,
28
+ )
29
+ from gooddata_pipelines.provisioning.utils.exceptions import WorkspaceException
30
+
31
+
32
+ class WorkspaceProvisioner(
33
+ Provisioning[WorkspaceFullLoad, WorkspaceIncrementalLoad]
34
+ ):
35
+ source_group_full: list[WorkspaceFullLoad]
36
+ source_group_incremental: list[WorkspaceIncrementalLoad]
37
+
38
+ def __init__(self, *args: str, **kwargs: str) -> None:
39
+ """Creates an instance of the WorkspaceProvisioner.
40
+
41
+ Calls the superclass constructor and initializes the validator, parser,
42
+ and maps for workspace data.
43
+ """
44
+ super().__init__(*args, **kwargs)
45
+ self.validator: WorkspaceDataValidator = WorkspaceDataValidator(
46
+ self._api
47
+ )
48
+ self.parser: WorkspaceDataParser = WorkspaceDataParser()
49
+ self.maps: WorkspaceDataMaps = WorkspaceDataMaps()
50
+
51
+ def _find_workspaces_to_update(
52
+ self,
53
+ source_group: list[WorkspaceFullLoad],
54
+ panther_group: list[CatalogWorkspace],
55
+ ids_in_both_systems: set[str],
56
+ ) -> set[str]:
57
+ """
58
+ Inspects existing Panther workspaces and compares them to workspaces from
59
+ the source database. If the ID exists in both systems but the workspace
60
+ name in GoodData Cloud is different from the source, the workspace will
61
+ be updated. The rest of the workspaces will be ignored.
62
+ """
63
+ existing_workspaces: dict[str, CatalogWorkspace] = {
64
+ workspace.id: workspace for workspace in panther_group
65
+ }
66
+
67
+ ids_to_update: set[str] = set()
68
+
69
+ for source_workspace in source_group:
70
+ source_id = source_workspace.workspace_id
71
+ source_name = source_workspace.workspace_name
72
+
73
+ if source_id not in ids_in_both_systems:
74
+ continue
75
+
76
+ if existing_workspaces.get(source_id):
77
+ panther_name = existing_workspaces[source_id].name
78
+ else:
79
+ continue
80
+
81
+ if source_name == panther_name:
82
+ continue
83
+
84
+ ids_to_update.add(source_id)
85
+
86
+ return ids_to_update
87
+
88
+ def _create_or_update_panther_workspaces(
89
+ self,
90
+ workspace_ids_to_create: set[str],
91
+ workspace_ids_to_update: set[str],
92
+ child_to_parent_map: dict[str, str],
93
+ workspace_id_to_wdf_map: dict[str, dict[str, list[str]]],
94
+ ) -> None:
95
+ action: Literal["CREATE", "UPDATE"]
96
+
97
+ for source_workspace in self.source_group_full:
98
+ if source_workspace.workspace_id in workspace_ids_to_update:
99
+ action = "UPDATE"
100
+ elif source_workspace.workspace_id in workspace_ids_to_create:
101
+ action = "CREATE"
102
+ else:
103
+ continue
104
+
105
+ context: WorkspaceContext = WorkspaceContext(
106
+ workspace_id=source_workspace.workspace_id,
107
+ workspace_name=source_workspace.workspace_name,
108
+ wdf_id=source_workspace.workspace_data_filter_id,
109
+ wdf_values=source_workspace.workspace_data_filter_values,
110
+ )
111
+
112
+ parent_workspace_id: str = child_to_parent_map[context.workspace_id]
113
+
114
+ try:
115
+ self._api.create_or_update_panther_workspace(
116
+ workspace_id=context.workspace_id,
117
+ workspace_name=str(context.workspace_name),
118
+ parent_id=parent_workspace_id,
119
+ )
120
+ self.logger.info(
121
+ f"{action.title()}d workspace: {context.workspace_id}"
122
+ )
123
+
124
+ except GoodDataApiException as e:
125
+ combined_context = {**context.__dict__, **e.__dict__}
126
+ self.logger.error(
127
+ f"Failed to {action.title()} workspace: {context.workspace_id}. "
128
+ + f"Error: {e} Context: {combined_context}"
129
+ )
130
+
131
+ # If child workspace has WDF settings, apply them
132
+ child_wdfs: dict[str, list[str]] = workspace_id_to_wdf_map.get(
133
+ context.workspace_id, {}
134
+ )
135
+ if child_wdfs:
136
+ self.wdf_manager.check_wdf_settings(
137
+ context,
138
+ )
139
+
140
+ def delete_panther_workspaces(
141
+ self, ids_to_delete: set[str], workspace_id_to_name_map: dict[str, str]
142
+ ) -> None:
143
+ for workspace_id in ids_to_delete:
144
+ workspace_context: WorkspaceContext = WorkspaceContext(
145
+ workspace_id=workspace_id,
146
+ workspace_name=workspace_id_to_name_map.get(workspace_id),
147
+ )
148
+ try:
149
+ self._api.delete_panther_workspace(workspace_id)
150
+ self.logger.info(
151
+ f"Deleted workspace: {workspace_context.workspace_id}"
152
+ )
153
+
154
+ except GoodDataApiException as e:
155
+ exception_context = {**workspace_context.__dict__, **e.__dict__}
156
+ self.logger.error(
157
+ f"Failed to delete workspace: {workspace_context.workspace_id}. "
158
+ + f"Error: {e} Context: {exception_context}"
159
+ )
160
+
161
+ def verify_workspace_provisioning(
162
+ self,
163
+ source_group: list[WorkspaceFullLoad],
164
+ parent_workspace_ids: set[str],
165
+ ) -> None:
166
+ """Verifies that upstream content is equal to the source data."""
167
+ source_ids_names: set[tuple[str, str]] = {
168
+ (item.workspace_id, item.workspace_name) for item in source_group
169
+ }
170
+
171
+ panther_workspaces: list[CatalogWorkspace] = (
172
+ self._api.get_panther_children_workspaces(parent_workspace_ids)
173
+ )
174
+
175
+ panther_ids_names: set[tuple[str, str]] = {
176
+ (workspace.workspace_id, workspace.name)
177
+ for workspace in panther_workspaces
178
+ }
179
+
180
+ diff: set[tuple[str, str]] = source_ids_names.symmetric_difference(
181
+ panther_ids_names
182
+ )
183
+
184
+ if diff:
185
+ raise WorkspaceException(
186
+ "Provisioning failed. The source and Panther workspaces do not "
187
+ + f"match. Difference: {diff}"
188
+ )
189
+
190
+ def _provision_full_load(self) -> None:
191
+ """Full load workspace provisioning."""
192
+
193
+ # Validate the source data.
194
+ self.validator.validate_source_data(self.source_group_full)
195
+
196
+ # Set the maps based on the source data.
197
+ self.maps = self.parser.set_maps_based_on_source(
198
+ self.maps, self.source_group_full
199
+ )
200
+
201
+ # Get upstream children of all parent workspaces.
202
+ self.upstream_group: list[CatalogWorkspace] = (
203
+ self._api.get_panther_children_workspaces(self.maps.parent_ids)
204
+ )
205
+
206
+ # Set maps that require upstream data.
207
+ self.maps = self.parser.set_maps_with_upstream_data(
208
+ self.maps, self.source_group_full, self.upstream_group
209
+ )
210
+
211
+ # Create an instance of WDF manager with the created maps.
212
+ self.wdf_manager = WorkspaceDataFilterManager(self._api, self.maps)
213
+
214
+ # Sort the ids to groups based on provisioning logic.
215
+ id_groups = self._create_groups(
216
+ self.maps.source_ids, self.maps.upstream_ids
217
+ )
218
+
219
+ # Find out which workspaces to update.
220
+ self.ids_to_update: set[str] = self._find_workspaces_to_update(
221
+ self.source_group_full,
222
+ self.upstream_group,
223
+ id_groups.ids_in_both_systems,
224
+ )
225
+
226
+ # Delete the workspaces that are not in the source.
227
+ self.delete_panther_workspaces(
228
+ id_groups.ids_to_delete, self.maps.workspace_id_to_name_map
229
+ )
230
+
231
+ # Create or update selected workspaces.
232
+ self._create_or_update_panther_workspaces(
233
+ id_groups.ids_to_create,
234
+ self.ids_to_update,
235
+ self.maps.child_to_parent_id_map,
236
+ self.maps.workspace_id_to_wdf_map,
237
+ )
238
+
239
+ # Check WDF settings of ignored workspaces.
240
+ ignored_workspace_ids: set[str] = self.maps.source_ids.difference(
241
+ id_groups.ids_to_create.union(self.ids_to_update).union(
242
+ id_groups.ids_to_delete
243
+ )
244
+ )
245
+
246
+ for ignored_workspace_id in ignored_workspace_ids:
247
+ ignored_workspace_context: WorkspaceContext = WorkspaceContext(
248
+ workspace_id=ignored_workspace_id,
249
+ workspace_name=self.maps.workspace_id_to_name_map.get(
250
+ ignored_workspace_id
251
+ ),
252
+ )
253
+ self.wdf_manager.check_wdf_settings(ignored_workspace_context)
254
+
255
+ # Verify the provisioning by queries to GoodData Cloud.
256
+ self.verify_workspace_provisioning(
257
+ self.source_group_full, self.maps.parent_ids
258
+ )
259
+
260
+ def _provision_incremental_load(self) -> None:
261
+ """Incremental workspace provisioning."""
262
+
263
+ raise NotImplementedError("Not implemented yet.")
@@ -0,0 +1,286 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ """Module for managing workspace data filter settings in GoodData Cloud."""
4
+
5
+ import json
6
+ from typing import Any
7
+ from uuid import uuid4
8
+
9
+ from requests import Response
10
+
11
+ from gooddata_pipelines.api import GoodDataApi
12
+ from gooddata_pipelines.logger.logger import LogObserver
13
+ from gooddata_pipelines.provisioning.entities.workspaces.models import (
14
+ WDFSetting,
15
+ WorkspaceDataMaps,
16
+ )
17
+ from gooddata_pipelines.provisioning.utils.context_objects import (
18
+ WorkspaceContext,
19
+ )
20
+ from gooddata_pipelines.provisioning.utils.exceptions import WorkspaceException
21
+
22
+
23
+ class WorkspaceDataFilterManager:
24
+ """
25
+ Helper class to manage workspace data filter settings. Note that Workspace
26
+ Data Filters themselves are not managed here. The Workspace Data Filter
27
+ Setting object represents the relationship of values in a WDF column and
28
+ a specific workspace.
29
+ """
30
+
31
+ def __init__(self, api: GoodDataApi, maps: WorkspaceDataMaps) -> None:
32
+ self.api: GoodDataApi = api
33
+ self.maps: WorkspaceDataMaps = maps
34
+ self.logger: LogObserver = LogObserver()
35
+
36
+ @staticmethod
37
+ def _create_wdf_setting_dict(
38
+ wdf_setting_id: str, wdf_id: str, wdf_values: list[str]
39
+ ) -> dict[str, Any]:
40
+ """Loads a JSON template of a WDF setting and fills it with the given values."""
41
+ values = [str(value) for value in wdf_values]
42
+
43
+ import os
44
+
45
+ wdf_setting_path = os.path.join(
46
+ os.path.dirname(__file__), "../../assets/wdf_setting.json"
47
+ )
48
+ with open(os.path.abspath(wdf_setting_path)) as file:
49
+ wdf_setting: dict[str, Any] = json.load(file)
50
+
51
+ wdf_setting["data"]["attributes"]["filterValues"] = values
52
+ wdf_setting["data"]["id"] = wdf_setting_id
53
+ wdf_setting["data"]["relationships"]["workspaceDataFilter"]["data"][
54
+ "id"
55
+ ] = wdf_id
56
+
57
+ return wdf_setting
58
+
59
+ def _get_wdf_settings_for_workspace(
60
+ self, workspace_id: str
61
+ ) -> list[WDFSetting]:
62
+ """Gets all workspace data filter settings for a given workspace."""
63
+ wdf_settings_response: Response = (
64
+ self.api.get_workspace_data_filter_settings(workspace_id)
65
+ )
66
+
67
+ if not wdf_settings_response.ok:
68
+ raise WorkspaceException(
69
+ f"Failed to get WDF settings: {wdf_settings_response.text}",
70
+ workspace_id=workspace_id,
71
+ http_status=str(wdf_settings_response.status_code),
72
+ )
73
+
74
+ raw_wdf_settings: dict[str, Any] = wdf_settings_response.json()
75
+
76
+ data: list[dict[str, Any]] = raw_wdf_settings["data"]
77
+
78
+ wdf_settings: list[WDFSetting] = [
79
+ WDFSetting(**wdf_setting) for wdf_setting in data
80
+ ]
81
+
82
+ return wdf_settings
83
+
84
+ @staticmethod
85
+ def _get_actual_wdf_setting_id_and_values(
86
+ actual_wdf_settings: list[WDFSetting], actual_wdf_id: str
87
+ ) -> tuple[str, list[str]]:
88
+ """Returns WDF setting ID and values for given WDF ID."""
89
+ for actual_wdf_setting in actual_wdf_settings:
90
+ if (
91
+ actual_wdf_setting.relationships.workspaceDataFilter["data"].id
92
+ == actual_wdf_id
93
+ ):
94
+ actual_wdf_setting_id = actual_wdf_setting.id
95
+ actual_wdf_values = actual_wdf_setting.attributes.filterValues
96
+
97
+ return actual_wdf_setting_id, actual_wdf_values
98
+
99
+ raise WorkspaceException(
100
+ "Could not find WDF setting for WDF in actual WDF settings.",
101
+ wdf_id=actual_wdf_id,
102
+ )
103
+
104
+ def _delete_redundant_wdf_setting(
105
+ self,
106
+ workspace_context: WorkspaceContext,
107
+ actual_wdf_id: str,
108
+ actual_wdf_settings: list[WDFSetting],
109
+ ) -> None:
110
+ """Deletes a WDF setting."""
111
+ actual_wdf_setting_id, actual_wdf_values = (
112
+ self._get_actual_wdf_setting_id_and_values(
113
+ actual_wdf_settings, actual_wdf_id
114
+ )
115
+ )
116
+ # Update context with actual values
117
+ workspace_context.wdf_id = actual_wdf_id
118
+ workspace_context.wdf_values = actual_wdf_values
119
+
120
+ # If there is a WDF setting for a WDF that should not be associated with
121
+ # the workspace, then delete the setting
122
+ delete_response: Response = (
123
+ self.api.delete_workspace_data_filter_setting(
124
+ workspace_context.workspace_id,
125
+ actual_wdf_setting_id,
126
+ )
127
+ )
128
+ if delete_response.ok:
129
+ self.logger.info(
130
+ f"Deleted WDF setting for WDF {workspace_context.wdf_id} in "
131
+ + f"workspace {workspace_context.workspace_id}"
132
+ )
133
+ else:
134
+ raise WorkspaceException(
135
+ f"Failed to delete WDF setting: {delete_response.text}",
136
+ delete_response,
137
+ workspace_context,
138
+ )
139
+
140
+ def _post_wdf_setting(
141
+ self,
142
+ workspace_context: WorkspaceContext,
143
+ ) -> None:
144
+ """Posts a WDF setting to Panther."""
145
+ wdf_setting = self._create_wdf_setting_dict(
146
+ str(uuid4()),
147
+ str(workspace_context.wdf_id),
148
+ workspace_context.wdf_values
149
+ if workspace_context.wdf_values
150
+ else [],
151
+ )
152
+ post_response: Response = self.api.post_workspace_data_filter_setting(
153
+ workspace_context.workspace_id,
154
+ wdf_setting,
155
+ )
156
+ if post_response.ok:
157
+ self.logger.info(
158
+ f"Created WDF setting for WDF {workspace_context.wdf_id} in workspace {workspace_context.workspace_id}"
159
+ )
160
+ else:
161
+ raise WorkspaceException(
162
+ f"Failed to create WDF setting: {post_response.text}",
163
+ post_response,
164
+ workspace_context,
165
+ )
166
+
167
+ def _put_wdf_setting(
168
+ self,
169
+ workspace_context: WorkspaceContext,
170
+ actual_wdf_settings: list[WDFSetting],
171
+ ) -> None:
172
+ # get Panther WDF setting ID
173
+ actual_wdf_setting_id, _ = self._get_actual_wdf_setting_id_and_values(
174
+ actual_wdf_settings, str(workspace_context.wdf_id)
175
+ )
176
+
177
+ wdf_setting = self._create_wdf_setting_dict(
178
+ actual_wdf_setting_id,
179
+ str(workspace_context.wdf_id),
180
+ workspace_context.wdf_values
181
+ if workspace_context.wdf_values
182
+ else [],
183
+ )
184
+
185
+ put_response: Response = self.api.put_workspace_data_filter_setting(
186
+ workspace_context.workspace_id,
187
+ wdf_setting,
188
+ )
189
+ if put_response.ok:
190
+ self.logger.info(
191
+ f"Updated WDF setting for WDF {workspace_context.wdf_id} in workspace {workspace_context.workspace_id}"
192
+ )
193
+ else:
194
+ raise WorkspaceException(
195
+ f"Failed to update WDF setting: {put_response.text}",
196
+ put_response,
197
+ workspace_context,
198
+ )
199
+
200
+ def _compare_wdf_settings(
201
+ self,
202
+ workspace_context: WorkspaceContext,
203
+ source_wdf_config: dict[str, list[str]],
204
+ upstream_wdf_settings: list[WDFSetting],
205
+ ) -> None:
206
+ """
207
+ Compares WDF settings as extracted from the source with the actual WDF
208
+ settings in Panther. We do not know the WDF setting IDs from the outset,
209
+ which is why we need to check the WDF IDs and then the settings values
210
+ in a roundabout way. I.e., we know that a WDF should have some setting
211
+ with an unknown ID, but certain values. In this case, we don't care about
212
+ the setting ID, but need to make sure that the workspace has the correct
213
+ values for the WDF.
214
+ """
215
+ upstream_wdf_ids: set[str] = {
216
+ upstream_wdf_setting.relationships.workspaceDataFilter["data"].id
217
+ for upstream_wdf_setting in upstream_wdf_settings
218
+ }
219
+
220
+ source_wdf_ids: set[str] = set(source_wdf_config.keys())
221
+
222
+ # Create map of upstream WDF_ID : WDF values
223
+ upstream_wdf_ids_and_values: dict[str, list[str]] = {}
224
+ for upstream_wdf_setting in upstream_wdf_settings:
225
+ upstream_wdf_ids_and_values[
226
+ upstream_wdf_setting.relationships.workspaceDataFilter[
227
+ "data"
228
+ ].id
229
+ ] = upstream_wdf_setting.attributes.filterValues
230
+
231
+ # Iterate through source WDF settings
232
+ for source_wdf_id in source_wdf_ids:
233
+ # Update WDF information in context to make sure we have the correct
234
+ # data -> there can be multiple WDFs per workspace
235
+ source_values: list[str] = source_wdf_config[source_wdf_id]
236
+ workspace_context.wdf_id = source_wdf_id
237
+ workspace_context.wdf_values = source_values
238
+
239
+ # Post WDF setting if missing for a WDF, when it should be there
240
+ if source_wdf_id not in upstream_wdf_ids:
241
+ self._post_wdf_setting(workspace_context)
242
+
243
+ # If settings exist for a WDF that should be there, then compare values
244
+ elif source_wdf_id in upstream_wdf_ids:
245
+ actual_values: list[str] = upstream_wdf_ids_and_values[
246
+ source_wdf_id
247
+ ]
248
+
249
+ # If values are different, then update the WDF settings
250
+ if set(source_values) != set(actual_values):
251
+ self._put_wdf_setting(
252
+ workspace_context,
253
+ upstream_wdf_settings,
254
+ )
255
+
256
+ # Go through Panther WDF settings and check if there are any that should
257
+ # not be there. Delete them if so.
258
+ for actual_wdf_id in upstream_wdf_ids:
259
+ if actual_wdf_id not in source_wdf_ids:
260
+ self._delete_redundant_wdf_setting(
261
+ workspace_context,
262
+ actual_wdf_id,
263
+ upstream_wdf_settings,
264
+ )
265
+
266
+ def check_wdf_settings(
267
+ self,
268
+ workspace_context: WorkspaceContext,
269
+ ) -> None:
270
+ """
271
+ Checks WDF settings for a given workspace.
272
+ Creates WDF settings defined in source if they are missing in Panther.
273
+ Updates WDF setting values if they are different in source and Panther.
274
+ Deletes WDF settings from Panther if they are not defined in source.
275
+ """
276
+ actual_wdf_settings: list[WDFSetting] = (
277
+ self._get_wdf_settings_for_workspace(workspace_context.workspace_id)
278
+ )
279
+
280
+ source_wdf_config: dict[str, list[str]] = (
281
+ self.maps.workspace_id_to_wdf_map[workspace_context.workspace_id]
282
+ )
283
+
284
+ self._compare_wdf_settings(
285
+ workspace_context, source_wdf_config, actual_wdf_settings
286
+ )
@@ -0,0 +1,123 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ """Module for parsing and processing workspace data in GoodData Cloud."""
4
+
5
+ from gooddata_sdk.catalog.workspace.entity_model.workspace import (
6
+ CatalogWorkspace,
7
+ )
8
+
9
+ from gooddata_pipelines.provisioning.entities.workspaces.models import (
10
+ WorkspaceDataMaps,
11
+ WorkspaceFullLoad,
12
+ )
13
+
14
+
15
+ class WorkspaceDataParser:
16
+ """Helper class to process workspace data retrieved from Panther and source DB."""
17
+
18
+ @staticmethod
19
+ def _get_id_to_name_map(
20
+ source_group: list[WorkspaceFullLoad],
21
+ upstream_group: list[CatalogWorkspace],
22
+ ) -> dict[str, str]:
23
+ """Creates a map of workspace IDs to their names for all known workspaces."""
24
+ source_map: dict[str, str] = {
25
+ workspace.workspace_id: workspace.workspace_name
26
+ for workspace in source_group
27
+ }
28
+ upstream_map: dict[str, str] = {
29
+ item.workspace_id: item.name for item in upstream_group
30
+ }
31
+
32
+ return {**upstream_map, **source_map}
33
+
34
+ @staticmethod
35
+ def _get_child_to_parent_map(
36
+ source_group: list[WorkspaceFullLoad],
37
+ ) -> dict[str, str]:
38
+ """Creates a map of child workspace IDs to their parent workspace IDs."""
39
+ child_to_parent_map: dict[str, str] = {
40
+ workspace.workspace_id: workspace.parent_id
41
+ for workspace in source_group
42
+ }
43
+
44
+ return child_to_parent_map
45
+
46
+ @staticmethod
47
+ def _get_set_of_ids_from_source(
48
+ source_group: list[WorkspaceFullLoad], column_name: str
49
+ ) -> set[str]:
50
+ """Creates a set of unique parent workspace IDs."""
51
+ set_of_ids: set[str] = {
52
+ getattr(workspace, column_name)
53
+ for workspace in source_group
54
+ if getattr(workspace, column_name)
55
+ }
56
+ return set_of_ids
57
+
58
+ @staticmethod
59
+ def get_set_of_upstream_workspace_ids(
60
+ upstream_group: list[CatalogWorkspace],
61
+ ) -> set[str]:
62
+ """Creates a set of unique upstream workspace IDs."""
63
+ set_of_ids: set[str] = {item.workspace_id for item in upstream_group}
64
+ return set_of_ids
65
+
66
+ def _get_child_to_wdfs_map(
67
+ self, source_group: list[WorkspaceFullLoad]
68
+ ) -> dict[str, dict[str, list[str]]]:
69
+ """Creates a map of child workspace IDs to their WDF IDs."""
70
+ # TODO: Use objects or a more transparent data structure instead of this.
71
+ child_to_wdf_map: dict[str, dict[str, list[str]]] = {}
72
+
73
+ # For each child, get its possible WDF IDs and values for each id
74
+ for workspace in source_group:
75
+ child_id: str = workspace.workspace_id
76
+ wdf_id: str | None = workspace.workspace_data_filter_id
77
+ wdf_values: list[str] | None = (
78
+ workspace.workspace_data_filter_values
79
+ )
80
+
81
+ if wdf_values and wdf_id:
82
+ if not child_to_wdf_map.get(child_id):
83
+ child_to_wdf_map[child_id] = {}
84
+ child_to_wdf_map[child_id][wdf_id] = wdf_values
85
+
86
+ return child_to_wdf_map
87
+
88
+ def set_maps_based_on_source(
89
+ self,
90
+ map_object: WorkspaceDataMaps,
91
+ source_group: list[WorkspaceFullLoad],
92
+ ) -> WorkspaceDataMaps:
93
+ """Creates maps which are dependent on the source group only."""
94
+ map_object.child_to_parent_id_map = self._get_child_to_parent_map(
95
+ source_group
96
+ )
97
+ map_object.workspace_id_to_wdf_map = self._get_child_to_wdfs_map(
98
+ source_group
99
+ )
100
+ map_object.parent_ids = self._get_set_of_ids_from_source(
101
+ source_group, "parent_id"
102
+ )
103
+ map_object.source_ids = self._get_set_of_ids_from_source(
104
+ source_group, "workspace_id"
105
+ )
106
+
107
+ return map_object
108
+
109
+ def set_maps_with_upstream_data(
110
+ self,
111
+ map_object: WorkspaceDataMaps,
112
+ source_group: list[WorkspaceFullLoad],
113
+ upstream_group: list[CatalogWorkspace],
114
+ ) -> WorkspaceDataMaps:
115
+ """Creates maps which are dependent on both the source group and upstream group."""
116
+ map_object.workspace_id_to_name_map = self._get_id_to_name_map(
117
+ source_group, upstream_group
118
+ )
119
+ map_object.upstream_ids = self.get_set_of_upstream_workspace_ids(
120
+ upstream_group
121
+ )
122
+
123
+ return map_object