gooddata-pipelines 1.47.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gooddata-pipelines might be problematic. Click here for more details.
- gooddata_pipelines/__init__.py +59 -0
- gooddata_pipelines/_version.py +7 -0
- gooddata_pipelines/api/__init__.py +5 -0
- gooddata_pipelines/api/exceptions.py +41 -0
- gooddata_pipelines/api/gooddata_api.py +309 -0
- gooddata_pipelines/api/gooddata_api_wrapper.py +36 -0
- gooddata_pipelines/api/gooddata_sdk.py +374 -0
- gooddata_pipelines/api/utils.py +43 -0
- gooddata_pipelines/backup_and_restore/__init__.py +1 -0
- gooddata_pipelines/backup_and_restore/backup_input_processor.py +195 -0
- gooddata_pipelines/backup_and_restore/backup_manager.py +430 -0
- gooddata_pipelines/backup_and_restore/constants.py +42 -0
- gooddata_pipelines/backup_and_restore/csv_reader.py +41 -0
- gooddata_pipelines/backup_and_restore/models/__init__.py +1 -0
- gooddata_pipelines/backup_and_restore/models/input_type.py +11 -0
- gooddata_pipelines/backup_and_restore/models/storage.py +58 -0
- gooddata_pipelines/backup_and_restore/models/workspace_response.py +51 -0
- gooddata_pipelines/backup_and_restore/storage/__init__.py +1 -0
- gooddata_pipelines/backup_and_restore/storage/base_storage.py +18 -0
- gooddata_pipelines/backup_and_restore/storage/local_storage.py +37 -0
- gooddata_pipelines/backup_and_restore/storage/s3_storage.py +71 -0
- gooddata_pipelines/logger/__init__.py +8 -0
- gooddata_pipelines/logger/logger.py +115 -0
- gooddata_pipelines/provisioning/__init__.py +31 -0
- gooddata_pipelines/provisioning/assets/wdf_setting.json +14 -0
- gooddata_pipelines/provisioning/entities/__init__.py +1 -0
- gooddata_pipelines/provisioning/entities/user_data_filters/__init__.py +1 -0
- gooddata_pipelines/provisioning/entities/user_data_filters/models/__init__.py +1 -0
- gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py +32 -0
- gooddata_pipelines/provisioning/entities/user_data_filters/user_data_filters.py +221 -0
- gooddata_pipelines/provisioning/entities/users/__init__.py +1 -0
- gooddata_pipelines/provisioning/entities/users/models/__init__.py +1 -0
- gooddata_pipelines/provisioning/entities/users/models/permissions.py +242 -0
- gooddata_pipelines/provisioning/entities/users/models/user_groups.py +64 -0
- gooddata_pipelines/provisioning/entities/users/models/users.py +114 -0
- gooddata_pipelines/provisioning/entities/users/permissions.py +153 -0
- gooddata_pipelines/provisioning/entities/users/user_groups.py +212 -0
- gooddata_pipelines/provisioning/entities/users/users.py +179 -0
- gooddata_pipelines/provisioning/entities/workspaces/__init__.py +1 -0
- gooddata_pipelines/provisioning/entities/workspaces/models.py +78 -0
- gooddata_pipelines/provisioning/entities/workspaces/workspace.py +263 -0
- gooddata_pipelines/provisioning/entities/workspaces/workspace_data_filters.py +286 -0
- gooddata_pipelines/provisioning/entities/workspaces/workspace_data_parser.py +123 -0
- gooddata_pipelines/provisioning/entities/workspaces/workspace_data_validator.py +188 -0
- gooddata_pipelines/provisioning/provisioning.py +132 -0
- gooddata_pipelines/provisioning/utils/__init__.py +1 -0
- gooddata_pipelines/provisioning/utils/context_objects.py +32 -0
- gooddata_pipelines/provisioning/utils/exceptions.py +95 -0
- gooddata_pipelines/provisioning/utils/utils.py +80 -0
- gooddata_pipelines/py.typed +0 -0
- gooddata_pipelines-1.47.1.dev1.dist-info/METADATA +85 -0
- gooddata_pipelines-1.47.1.dev1.dist-info/RECORD +54 -0
- gooddata_pipelines-1.47.1.dev1.dist-info/WHEEL +4 -0
- gooddata_pipelines-1.47.1.dev1.dist-info/licenses/LICENSE.txt +1 -277
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# (C) 2025 GoodData Corporation
|
|
2
|
+
"""Module for provisioning workspaces in GoodData Cloud."""
|
|
3
|
+
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
from gooddata_sdk.catalog.workspace.entity_model.workspace import (
|
|
7
|
+
CatalogWorkspace,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from gooddata_pipelines.api.exceptions import GoodDataApiException
|
|
11
|
+
from gooddata_pipelines.provisioning.entities.workspaces.models import (
|
|
12
|
+
WorkspaceDataMaps,
|
|
13
|
+
WorkspaceFullLoad,
|
|
14
|
+
WorkspaceIncrementalLoad,
|
|
15
|
+
)
|
|
16
|
+
from gooddata_pipelines.provisioning.entities.workspaces.workspace_data_filters import (
|
|
17
|
+
WorkspaceDataFilterManager,
|
|
18
|
+
)
|
|
19
|
+
from gooddata_pipelines.provisioning.entities.workspaces.workspace_data_parser import (
|
|
20
|
+
WorkspaceDataParser,
|
|
21
|
+
)
|
|
22
|
+
from gooddata_pipelines.provisioning.entities.workspaces.workspace_data_validator import (
|
|
23
|
+
WorkspaceDataValidator,
|
|
24
|
+
)
|
|
25
|
+
from gooddata_pipelines.provisioning.provisioning import Provisioning
|
|
26
|
+
from gooddata_pipelines.provisioning.utils.context_objects import (
|
|
27
|
+
WorkspaceContext,
|
|
28
|
+
)
|
|
29
|
+
from gooddata_pipelines.provisioning.utils.exceptions import WorkspaceException
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class WorkspaceProvisioner(
|
|
33
|
+
Provisioning[WorkspaceFullLoad, WorkspaceIncrementalLoad]
|
|
34
|
+
):
|
|
35
|
+
source_group_full: list[WorkspaceFullLoad]
|
|
36
|
+
source_group_incremental: list[WorkspaceIncrementalLoad]
|
|
37
|
+
|
|
38
|
+
def __init__(self, *args: str, **kwargs: str) -> None:
|
|
39
|
+
"""Creates an instance of the WorkspaceProvisioner.
|
|
40
|
+
|
|
41
|
+
Calls the superclass constructor and initializes the validator, parser,
|
|
42
|
+
and maps for workspace data.
|
|
43
|
+
"""
|
|
44
|
+
super().__init__(*args, **kwargs)
|
|
45
|
+
self.validator: WorkspaceDataValidator = WorkspaceDataValidator(
|
|
46
|
+
self._api
|
|
47
|
+
)
|
|
48
|
+
self.parser: WorkspaceDataParser = WorkspaceDataParser()
|
|
49
|
+
self.maps: WorkspaceDataMaps = WorkspaceDataMaps()
|
|
50
|
+
|
|
51
|
+
def _find_workspaces_to_update(
|
|
52
|
+
self,
|
|
53
|
+
source_group: list[WorkspaceFullLoad],
|
|
54
|
+
panther_group: list[CatalogWorkspace],
|
|
55
|
+
ids_in_both_systems: set[str],
|
|
56
|
+
) -> set[str]:
|
|
57
|
+
"""
|
|
58
|
+
Inspects existing Panther workspaces and compares them to workspaces from
|
|
59
|
+
the source database. If the ID exists in both systems but the workspace
|
|
60
|
+
name in GoodData Cloud is different from the source, the workspace will
|
|
61
|
+
be updated. The rest of the workspaces will be ignored.
|
|
62
|
+
"""
|
|
63
|
+
existing_workspaces: dict[str, CatalogWorkspace] = {
|
|
64
|
+
workspace.id: workspace for workspace in panther_group
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
ids_to_update: set[str] = set()
|
|
68
|
+
|
|
69
|
+
for source_workspace in source_group:
|
|
70
|
+
source_id = source_workspace.workspace_id
|
|
71
|
+
source_name = source_workspace.workspace_name
|
|
72
|
+
|
|
73
|
+
if source_id not in ids_in_both_systems:
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
if existing_workspaces.get(source_id):
|
|
77
|
+
panther_name = existing_workspaces[source_id].name
|
|
78
|
+
else:
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
if source_name == panther_name:
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
ids_to_update.add(source_id)
|
|
85
|
+
|
|
86
|
+
return ids_to_update
|
|
87
|
+
|
|
88
|
+
def _create_or_update_panther_workspaces(
|
|
89
|
+
self,
|
|
90
|
+
workspace_ids_to_create: set[str],
|
|
91
|
+
workspace_ids_to_update: set[str],
|
|
92
|
+
child_to_parent_map: dict[str, str],
|
|
93
|
+
workspace_id_to_wdf_map: dict[str, dict[str, list[str]]],
|
|
94
|
+
) -> None:
|
|
95
|
+
action: Literal["CREATE", "UPDATE"]
|
|
96
|
+
|
|
97
|
+
for source_workspace in self.source_group_full:
|
|
98
|
+
if source_workspace.workspace_id in workspace_ids_to_update:
|
|
99
|
+
action = "UPDATE"
|
|
100
|
+
elif source_workspace.workspace_id in workspace_ids_to_create:
|
|
101
|
+
action = "CREATE"
|
|
102
|
+
else:
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
context: WorkspaceContext = WorkspaceContext(
|
|
106
|
+
workspace_id=source_workspace.workspace_id,
|
|
107
|
+
workspace_name=source_workspace.workspace_name,
|
|
108
|
+
wdf_id=source_workspace.workspace_data_filter_id,
|
|
109
|
+
wdf_values=source_workspace.workspace_data_filter_values,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
parent_workspace_id: str = child_to_parent_map[context.workspace_id]
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
self._api.create_or_update_panther_workspace(
|
|
116
|
+
workspace_id=context.workspace_id,
|
|
117
|
+
workspace_name=str(context.workspace_name),
|
|
118
|
+
parent_id=parent_workspace_id,
|
|
119
|
+
)
|
|
120
|
+
self.logger.info(
|
|
121
|
+
f"{action.title()}d workspace: {context.workspace_id}"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
except GoodDataApiException as e:
|
|
125
|
+
combined_context = {**context.__dict__, **e.__dict__}
|
|
126
|
+
self.logger.error(
|
|
127
|
+
f"Failed to {action.title()} workspace: {context.workspace_id}. "
|
|
128
|
+
+ f"Error: {e} Context: {combined_context}"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# If child workspace has WDF settings, apply them
|
|
132
|
+
child_wdfs: dict[str, list[str]] = workspace_id_to_wdf_map.get(
|
|
133
|
+
context.workspace_id, {}
|
|
134
|
+
)
|
|
135
|
+
if child_wdfs:
|
|
136
|
+
self.wdf_manager.check_wdf_settings(
|
|
137
|
+
context,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def delete_panther_workspaces(
|
|
141
|
+
self, ids_to_delete: set[str], workspace_id_to_name_map: dict[str, str]
|
|
142
|
+
) -> None:
|
|
143
|
+
for workspace_id in ids_to_delete:
|
|
144
|
+
workspace_context: WorkspaceContext = WorkspaceContext(
|
|
145
|
+
workspace_id=workspace_id,
|
|
146
|
+
workspace_name=workspace_id_to_name_map.get(workspace_id),
|
|
147
|
+
)
|
|
148
|
+
try:
|
|
149
|
+
self._api.delete_panther_workspace(workspace_id)
|
|
150
|
+
self.logger.info(
|
|
151
|
+
f"Deleted workspace: {workspace_context.workspace_id}"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
except GoodDataApiException as e:
|
|
155
|
+
exception_context = {**workspace_context.__dict__, **e.__dict__}
|
|
156
|
+
self.logger.error(
|
|
157
|
+
f"Failed to delete workspace: {workspace_context.workspace_id}. "
|
|
158
|
+
+ f"Error: {e} Context: {exception_context}"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def verify_workspace_provisioning(
|
|
162
|
+
self,
|
|
163
|
+
source_group: list[WorkspaceFullLoad],
|
|
164
|
+
parent_workspace_ids: set[str],
|
|
165
|
+
) -> None:
|
|
166
|
+
"""Verifies that upstream content is equal to the source data."""
|
|
167
|
+
source_ids_names: set[tuple[str, str]] = {
|
|
168
|
+
(item.workspace_id, item.workspace_name) for item in source_group
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
panther_workspaces: list[CatalogWorkspace] = (
|
|
172
|
+
self._api.get_panther_children_workspaces(parent_workspace_ids)
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
panther_ids_names: set[tuple[str, str]] = {
|
|
176
|
+
(workspace.workspace_id, workspace.name)
|
|
177
|
+
for workspace in panther_workspaces
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
diff: set[tuple[str, str]] = source_ids_names.symmetric_difference(
|
|
181
|
+
panther_ids_names
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
if diff:
|
|
185
|
+
raise WorkspaceException(
|
|
186
|
+
"Provisioning failed. The source and Panther workspaces do not "
|
|
187
|
+
+ f"match. Difference: {diff}"
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
def _provision_full_load(self) -> None:
|
|
191
|
+
"""Full load workspace provisioning."""
|
|
192
|
+
|
|
193
|
+
# Validate the source data.
|
|
194
|
+
self.validator.validate_source_data(self.source_group_full)
|
|
195
|
+
|
|
196
|
+
# Set the maps based on the source data.
|
|
197
|
+
self.maps = self.parser.set_maps_based_on_source(
|
|
198
|
+
self.maps, self.source_group_full
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Get upstream children of all parent workspaces.
|
|
202
|
+
self.upstream_group: list[CatalogWorkspace] = (
|
|
203
|
+
self._api.get_panther_children_workspaces(self.maps.parent_ids)
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Set maps that require upstream data.
|
|
207
|
+
self.maps = self.parser.set_maps_with_upstream_data(
|
|
208
|
+
self.maps, self.source_group_full, self.upstream_group
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# Create an instance of WDF manager with the created maps.
|
|
212
|
+
self.wdf_manager = WorkspaceDataFilterManager(self._api, self.maps)
|
|
213
|
+
|
|
214
|
+
# Sort the ids to groups based on provisioning logic.
|
|
215
|
+
id_groups = self._create_groups(
|
|
216
|
+
self.maps.source_ids, self.maps.upstream_ids
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Find out which workspaces to update.
|
|
220
|
+
self.ids_to_update: set[str] = self._find_workspaces_to_update(
|
|
221
|
+
self.source_group_full,
|
|
222
|
+
self.upstream_group,
|
|
223
|
+
id_groups.ids_in_both_systems,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Delete the workspaces that are not in the source.
|
|
227
|
+
self.delete_panther_workspaces(
|
|
228
|
+
id_groups.ids_to_delete, self.maps.workspace_id_to_name_map
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Create or update selected workspaces.
|
|
232
|
+
self._create_or_update_panther_workspaces(
|
|
233
|
+
id_groups.ids_to_create,
|
|
234
|
+
self.ids_to_update,
|
|
235
|
+
self.maps.child_to_parent_id_map,
|
|
236
|
+
self.maps.workspace_id_to_wdf_map,
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Check WDF settings of ignored workspaces.
|
|
240
|
+
ignored_workspace_ids: set[str] = self.maps.source_ids.difference(
|
|
241
|
+
id_groups.ids_to_create.union(self.ids_to_update).union(
|
|
242
|
+
id_groups.ids_to_delete
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
for ignored_workspace_id in ignored_workspace_ids:
|
|
247
|
+
ignored_workspace_context: WorkspaceContext = WorkspaceContext(
|
|
248
|
+
workspace_id=ignored_workspace_id,
|
|
249
|
+
workspace_name=self.maps.workspace_id_to_name_map.get(
|
|
250
|
+
ignored_workspace_id
|
|
251
|
+
),
|
|
252
|
+
)
|
|
253
|
+
self.wdf_manager.check_wdf_settings(ignored_workspace_context)
|
|
254
|
+
|
|
255
|
+
# Verify the provisioning by queries to GoodData Cloud.
|
|
256
|
+
self.verify_workspace_provisioning(
|
|
257
|
+
self.source_group_full, self.maps.parent_ids
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
def _provision_incremental_load(self) -> None:
|
|
261
|
+
"""Incremental workspace provisioning."""
|
|
262
|
+
|
|
263
|
+
raise NotImplementedError("Not implemented yet.")
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
# (C) 2025 GoodData Corporation
|
|
2
|
+
|
|
3
|
+
"""Module for managing workspace data filter settings in GoodData Cloud."""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any
|
|
7
|
+
from uuid import uuid4
|
|
8
|
+
|
|
9
|
+
from requests import Response
|
|
10
|
+
|
|
11
|
+
from gooddata_pipelines.api import GoodDataApi
|
|
12
|
+
from gooddata_pipelines.logger.logger import LogObserver
|
|
13
|
+
from gooddata_pipelines.provisioning.entities.workspaces.models import (
|
|
14
|
+
WDFSetting,
|
|
15
|
+
WorkspaceDataMaps,
|
|
16
|
+
)
|
|
17
|
+
from gooddata_pipelines.provisioning.utils.context_objects import (
|
|
18
|
+
WorkspaceContext,
|
|
19
|
+
)
|
|
20
|
+
from gooddata_pipelines.provisioning.utils.exceptions import WorkspaceException
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class WorkspaceDataFilterManager:
|
|
24
|
+
"""
|
|
25
|
+
Helper class to manage workspace data filter settings. Note that Workspace
|
|
26
|
+
Data Filters themselves are not managed here. The Workspace Data Filter
|
|
27
|
+
Setting object represents the relationship of values in a WDF column and
|
|
28
|
+
a specific workspace.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, api: GoodDataApi, maps: WorkspaceDataMaps) -> None:
|
|
32
|
+
self.api: GoodDataApi = api
|
|
33
|
+
self.maps: WorkspaceDataMaps = maps
|
|
34
|
+
self.logger: LogObserver = LogObserver()
|
|
35
|
+
|
|
36
|
+
@staticmethod
|
|
37
|
+
def _create_wdf_setting_dict(
|
|
38
|
+
wdf_setting_id: str, wdf_id: str, wdf_values: list[str]
|
|
39
|
+
) -> dict[str, Any]:
|
|
40
|
+
"""Loads a JSON template of a WDF setting and fills it with the given values."""
|
|
41
|
+
values = [str(value) for value in wdf_values]
|
|
42
|
+
|
|
43
|
+
import os
|
|
44
|
+
|
|
45
|
+
wdf_setting_path = os.path.join(
|
|
46
|
+
os.path.dirname(__file__), "../../assets/wdf_setting.json"
|
|
47
|
+
)
|
|
48
|
+
with open(os.path.abspath(wdf_setting_path)) as file:
|
|
49
|
+
wdf_setting: dict[str, Any] = json.load(file)
|
|
50
|
+
|
|
51
|
+
wdf_setting["data"]["attributes"]["filterValues"] = values
|
|
52
|
+
wdf_setting["data"]["id"] = wdf_setting_id
|
|
53
|
+
wdf_setting["data"]["relationships"]["workspaceDataFilter"]["data"][
|
|
54
|
+
"id"
|
|
55
|
+
] = wdf_id
|
|
56
|
+
|
|
57
|
+
return wdf_setting
|
|
58
|
+
|
|
59
|
+
def _get_wdf_settings_for_workspace(
|
|
60
|
+
self, workspace_id: str
|
|
61
|
+
) -> list[WDFSetting]:
|
|
62
|
+
"""Gets all workspace data filter settings for a given workspace."""
|
|
63
|
+
wdf_settings_response: Response = (
|
|
64
|
+
self.api.get_workspace_data_filter_settings(workspace_id)
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
if not wdf_settings_response.ok:
|
|
68
|
+
raise WorkspaceException(
|
|
69
|
+
f"Failed to get WDF settings: {wdf_settings_response.text}",
|
|
70
|
+
workspace_id=workspace_id,
|
|
71
|
+
http_status=str(wdf_settings_response.status_code),
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
raw_wdf_settings: dict[str, Any] = wdf_settings_response.json()
|
|
75
|
+
|
|
76
|
+
data: list[dict[str, Any]] = raw_wdf_settings["data"]
|
|
77
|
+
|
|
78
|
+
wdf_settings: list[WDFSetting] = [
|
|
79
|
+
WDFSetting(**wdf_setting) for wdf_setting in data
|
|
80
|
+
]
|
|
81
|
+
|
|
82
|
+
return wdf_settings
|
|
83
|
+
|
|
84
|
+
@staticmethod
|
|
85
|
+
def _get_actual_wdf_setting_id_and_values(
|
|
86
|
+
actual_wdf_settings: list[WDFSetting], actual_wdf_id: str
|
|
87
|
+
) -> tuple[str, list[str]]:
|
|
88
|
+
"""Returns WDF setting ID and values for given WDF ID."""
|
|
89
|
+
for actual_wdf_setting in actual_wdf_settings:
|
|
90
|
+
if (
|
|
91
|
+
actual_wdf_setting.relationships.workspaceDataFilter["data"].id
|
|
92
|
+
== actual_wdf_id
|
|
93
|
+
):
|
|
94
|
+
actual_wdf_setting_id = actual_wdf_setting.id
|
|
95
|
+
actual_wdf_values = actual_wdf_setting.attributes.filterValues
|
|
96
|
+
|
|
97
|
+
return actual_wdf_setting_id, actual_wdf_values
|
|
98
|
+
|
|
99
|
+
raise WorkspaceException(
|
|
100
|
+
"Could not find WDF setting for WDF in actual WDF settings.",
|
|
101
|
+
wdf_id=actual_wdf_id,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def _delete_redundant_wdf_setting(
|
|
105
|
+
self,
|
|
106
|
+
workspace_context: WorkspaceContext,
|
|
107
|
+
actual_wdf_id: str,
|
|
108
|
+
actual_wdf_settings: list[WDFSetting],
|
|
109
|
+
) -> None:
|
|
110
|
+
"""Deletes a WDF setting."""
|
|
111
|
+
actual_wdf_setting_id, actual_wdf_values = (
|
|
112
|
+
self._get_actual_wdf_setting_id_and_values(
|
|
113
|
+
actual_wdf_settings, actual_wdf_id
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
# Update context with actual values
|
|
117
|
+
workspace_context.wdf_id = actual_wdf_id
|
|
118
|
+
workspace_context.wdf_values = actual_wdf_values
|
|
119
|
+
|
|
120
|
+
# If there is a WDF setting for a WDF that should not be associated with
|
|
121
|
+
# the workspace, then delete the setting
|
|
122
|
+
delete_response: Response = (
|
|
123
|
+
self.api.delete_workspace_data_filter_setting(
|
|
124
|
+
workspace_context.workspace_id,
|
|
125
|
+
actual_wdf_setting_id,
|
|
126
|
+
)
|
|
127
|
+
)
|
|
128
|
+
if delete_response.ok:
|
|
129
|
+
self.logger.info(
|
|
130
|
+
f"Deleted WDF setting for WDF {workspace_context.wdf_id} in "
|
|
131
|
+
+ f"workspace {workspace_context.workspace_id}"
|
|
132
|
+
)
|
|
133
|
+
else:
|
|
134
|
+
raise WorkspaceException(
|
|
135
|
+
f"Failed to delete WDF setting: {delete_response.text}",
|
|
136
|
+
delete_response,
|
|
137
|
+
workspace_context,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def _post_wdf_setting(
|
|
141
|
+
self,
|
|
142
|
+
workspace_context: WorkspaceContext,
|
|
143
|
+
) -> None:
|
|
144
|
+
"""Posts a WDF setting to Panther."""
|
|
145
|
+
wdf_setting = self._create_wdf_setting_dict(
|
|
146
|
+
str(uuid4()),
|
|
147
|
+
str(workspace_context.wdf_id),
|
|
148
|
+
workspace_context.wdf_values
|
|
149
|
+
if workspace_context.wdf_values
|
|
150
|
+
else [],
|
|
151
|
+
)
|
|
152
|
+
post_response: Response = self.api.post_workspace_data_filter_setting(
|
|
153
|
+
workspace_context.workspace_id,
|
|
154
|
+
wdf_setting,
|
|
155
|
+
)
|
|
156
|
+
if post_response.ok:
|
|
157
|
+
self.logger.info(
|
|
158
|
+
f"Created WDF setting for WDF {workspace_context.wdf_id} in workspace {workspace_context.workspace_id}"
|
|
159
|
+
)
|
|
160
|
+
else:
|
|
161
|
+
raise WorkspaceException(
|
|
162
|
+
f"Failed to create WDF setting: {post_response.text}",
|
|
163
|
+
post_response,
|
|
164
|
+
workspace_context,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
def _put_wdf_setting(
|
|
168
|
+
self,
|
|
169
|
+
workspace_context: WorkspaceContext,
|
|
170
|
+
actual_wdf_settings: list[WDFSetting],
|
|
171
|
+
) -> None:
|
|
172
|
+
# get Panther WDF setting ID
|
|
173
|
+
actual_wdf_setting_id, _ = self._get_actual_wdf_setting_id_and_values(
|
|
174
|
+
actual_wdf_settings, str(workspace_context.wdf_id)
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
wdf_setting = self._create_wdf_setting_dict(
|
|
178
|
+
actual_wdf_setting_id,
|
|
179
|
+
str(workspace_context.wdf_id),
|
|
180
|
+
workspace_context.wdf_values
|
|
181
|
+
if workspace_context.wdf_values
|
|
182
|
+
else [],
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
put_response: Response = self.api.put_workspace_data_filter_setting(
|
|
186
|
+
workspace_context.workspace_id,
|
|
187
|
+
wdf_setting,
|
|
188
|
+
)
|
|
189
|
+
if put_response.ok:
|
|
190
|
+
self.logger.info(
|
|
191
|
+
f"Updated WDF setting for WDF {workspace_context.wdf_id} in workspace {workspace_context.workspace_id}"
|
|
192
|
+
)
|
|
193
|
+
else:
|
|
194
|
+
raise WorkspaceException(
|
|
195
|
+
f"Failed to update WDF setting: {put_response.text}",
|
|
196
|
+
put_response,
|
|
197
|
+
workspace_context,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
def _compare_wdf_settings(
|
|
201
|
+
self,
|
|
202
|
+
workspace_context: WorkspaceContext,
|
|
203
|
+
source_wdf_config: dict[str, list[str]],
|
|
204
|
+
upstream_wdf_settings: list[WDFSetting],
|
|
205
|
+
) -> None:
|
|
206
|
+
"""
|
|
207
|
+
Compares WDF settings as extracted from the source with the actual WDF
|
|
208
|
+
settings in Panther. We do not know the WDF setting IDs from the outset,
|
|
209
|
+
which is why we need to check the WDF IDs and then the settings values
|
|
210
|
+
in a roundabout way. I.e., we know that a WDF should have some setting
|
|
211
|
+
with an unknown ID, but certain values. In this case, we don't care about
|
|
212
|
+
the setting ID, but need to make sure that the workspace has the correct
|
|
213
|
+
values for the WDF.
|
|
214
|
+
"""
|
|
215
|
+
upstream_wdf_ids: set[str] = {
|
|
216
|
+
upstream_wdf_setting.relationships.workspaceDataFilter["data"].id
|
|
217
|
+
for upstream_wdf_setting in upstream_wdf_settings
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
source_wdf_ids: set[str] = set(source_wdf_config.keys())
|
|
221
|
+
|
|
222
|
+
# Create map of upstream WDF_ID : WDF values
|
|
223
|
+
upstream_wdf_ids_and_values: dict[str, list[str]] = {}
|
|
224
|
+
for upstream_wdf_setting in upstream_wdf_settings:
|
|
225
|
+
upstream_wdf_ids_and_values[
|
|
226
|
+
upstream_wdf_setting.relationships.workspaceDataFilter[
|
|
227
|
+
"data"
|
|
228
|
+
].id
|
|
229
|
+
] = upstream_wdf_setting.attributes.filterValues
|
|
230
|
+
|
|
231
|
+
# Iterate through source WDF settings
|
|
232
|
+
for source_wdf_id in source_wdf_ids:
|
|
233
|
+
# Update WDF information in context to make sure we have the correct
|
|
234
|
+
# data -> there can be multiple WDFs per workspace
|
|
235
|
+
source_values: list[str] = source_wdf_config[source_wdf_id]
|
|
236
|
+
workspace_context.wdf_id = source_wdf_id
|
|
237
|
+
workspace_context.wdf_values = source_values
|
|
238
|
+
|
|
239
|
+
# Post WDF setting if missing for a WDF, when it should be there
|
|
240
|
+
if source_wdf_id not in upstream_wdf_ids:
|
|
241
|
+
self._post_wdf_setting(workspace_context)
|
|
242
|
+
|
|
243
|
+
# If settings exist for a WDF that should be there, then compare values
|
|
244
|
+
elif source_wdf_id in upstream_wdf_ids:
|
|
245
|
+
actual_values: list[str] = upstream_wdf_ids_and_values[
|
|
246
|
+
source_wdf_id
|
|
247
|
+
]
|
|
248
|
+
|
|
249
|
+
# If values are different, then update the WDF settings
|
|
250
|
+
if set(source_values) != set(actual_values):
|
|
251
|
+
self._put_wdf_setting(
|
|
252
|
+
workspace_context,
|
|
253
|
+
upstream_wdf_settings,
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# Go through Panther WDF settings and check if there are any that should
|
|
257
|
+
# not be there. Delete them if so.
|
|
258
|
+
for actual_wdf_id in upstream_wdf_ids:
|
|
259
|
+
if actual_wdf_id not in source_wdf_ids:
|
|
260
|
+
self._delete_redundant_wdf_setting(
|
|
261
|
+
workspace_context,
|
|
262
|
+
actual_wdf_id,
|
|
263
|
+
upstream_wdf_settings,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
def check_wdf_settings(
|
|
267
|
+
self,
|
|
268
|
+
workspace_context: WorkspaceContext,
|
|
269
|
+
) -> None:
|
|
270
|
+
"""
|
|
271
|
+
Checks WDF settings for a given workspace.
|
|
272
|
+
Creates WDF settings defined in source if they are missing in Panther.
|
|
273
|
+
Updates WDF setting values if they are different in source and Panther.
|
|
274
|
+
Deletes WDF settings from Panther if they are not defined in source.
|
|
275
|
+
"""
|
|
276
|
+
actual_wdf_settings: list[WDFSetting] = (
|
|
277
|
+
self._get_wdf_settings_for_workspace(workspace_context.workspace_id)
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
source_wdf_config: dict[str, list[str]] = (
|
|
281
|
+
self.maps.workspace_id_to_wdf_map[workspace_context.workspace_id]
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
self._compare_wdf_settings(
|
|
285
|
+
workspace_context, source_wdf_config, actual_wdf_settings
|
|
286
|
+
)
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# (C) 2025 GoodData Corporation
|
|
2
|
+
|
|
3
|
+
"""Module for parsing and processing workspace data in GoodData Cloud."""
|
|
4
|
+
|
|
5
|
+
from gooddata_sdk.catalog.workspace.entity_model.workspace import (
|
|
6
|
+
CatalogWorkspace,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
from gooddata_pipelines.provisioning.entities.workspaces.models import (
|
|
10
|
+
WorkspaceDataMaps,
|
|
11
|
+
WorkspaceFullLoad,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class WorkspaceDataParser:
|
|
16
|
+
"""Helper class to process workspace data retrieved from Panther and source DB."""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def _get_id_to_name_map(
|
|
20
|
+
source_group: list[WorkspaceFullLoad],
|
|
21
|
+
upstream_group: list[CatalogWorkspace],
|
|
22
|
+
) -> dict[str, str]:
|
|
23
|
+
"""Creates a map of workspace IDs to their names for all known workspaces."""
|
|
24
|
+
source_map: dict[str, str] = {
|
|
25
|
+
workspace.workspace_id: workspace.workspace_name
|
|
26
|
+
for workspace in source_group
|
|
27
|
+
}
|
|
28
|
+
upstream_map: dict[str, str] = {
|
|
29
|
+
item.workspace_id: item.name for item in upstream_group
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return {**upstream_map, **source_map}
|
|
33
|
+
|
|
34
|
+
@staticmethod
|
|
35
|
+
def _get_child_to_parent_map(
|
|
36
|
+
source_group: list[WorkspaceFullLoad],
|
|
37
|
+
) -> dict[str, str]:
|
|
38
|
+
"""Creates a map of child workspace IDs to their parent workspace IDs."""
|
|
39
|
+
child_to_parent_map: dict[str, str] = {
|
|
40
|
+
workspace.workspace_id: workspace.parent_id
|
|
41
|
+
for workspace in source_group
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return child_to_parent_map
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def _get_set_of_ids_from_source(
|
|
48
|
+
source_group: list[WorkspaceFullLoad], column_name: str
|
|
49
|
+
) -> set[str]:
|
|
50
|
+
"""Creates a set of unique parent workspace IDs."""
|
|
51
|
+
set_of_ids: set[str] = {
|
|
52
|
+
getattr(workspace, column_name)
|
|
53
|
+
for workspace in source_group
|
|
54
|
+
if getattr(workspace, column_name)
|
|
55
|
+
}
|
|
56
|
+
return set_of_ids
|
|
57
|
+
|
|
58
|
+
@staticmethod
|
|
59
|
+
def get_set_of_upstream_workspace_ids(
|
|
60
|
+
upstream_group: list[CatalogWorkspace],
|
|
61
|
+
) -> set[str]:
|
|
62
|
+
"""Creates a set of unique upstream workspace IDs."""
|
|
63
|
+
set_of_ids: set[str] = {item.workspace_id for item in upstream_group}
|
|
64
|
+
return set_of_ids
|
|
65
|
+
|
|
66
|
+
def _get_child_to_wdfs_map(
|
|
67
|
+
self, source_group: list[WorkspaceFullLoad]
|
|
68
|
+
) -> dict[str, dict[str, list[str]]]:
|
|
69
|
+
"""Creates a map of child workspace IDs to their WDF IDs."""
|
|
70
|
+
# TODO: Use objects or a more transparent data structure instead of this.
|
|
71
|
+
child_to_wdf_map: dict[str, dict[str, list[str]]] = {}
|
|
72
|
+
|
|
73
|
+
# For each child, get its possible WDF IDs and values for each id
|
|
74
|
+
for workspace in source_group:
|
|
75
|
+
child_id: str = workspace.workspace_id
|
|
76
|
+
wdf_id: str | None = workspace.workspace_data_filter_id
|
|
77
|
+
wdf_values: list[str] | None = (
|
|
78
|
+
workspace.workspace_data_filter_values
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if wdf_values and wdf_id:
|
|
82
|
+
if not child_to_wdf_map.get(child_id):
|
|
83
|
+
child_to_wdf_map[child_id] = {}
|
|
84
|
+
child_to_wdf_map[child_id][wdf_id] = wdf_values
|
|
85
|
+
|
|
86
|
+
return child_to_wdf_map
|
|
87
|
+
|
|
88
|
+
def set_maps_based_on_source(
|
|
89
|
+
self,
|
|
90
|
+
map_object: WorkspaceDataMaps,
|
|
91
|
+
source_group: list[WorkspaceFullLoad],
|
|
92
|
+
) -> WorkspaceDataMaps:
|
|
93
|
+
"""Creates maps which are dependent on the source group only."""
|
|
94
|
+
map_object.child_to_parent_id_map = self._get_child_to_parent_map(
|
|
95
|
+
source_group
|
|
96
|
+
)
|
|
97
|
+
map_object.workspace_id_to_wdf_map = self._get_child_to_wdfs_map(
|
|
98
|
+
source_group
|
|
99
|
+
)
|
|
100
|
+
map_object.parent_ids = self._get_set_of_ids_from_source(
|
|
101
|
+
source_group, "parent_id"
|
|
102
|
+
)
|
|
103
|
+
map_object.source_ids = self._get_set_of_ids_from_source(
|
|
104
|
+
source_group, "workspace_id"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
return map_object
|
|
108
|
+
|
|
109
|
+
def set_maps_with_upstream_data(
|
|
110
|
+
self,
|
|
111
|
+
map_object: WorkspaceDataMaps,
|
|
112
|
+
source_group: list[WorkspaceFullLoad],
|
|
113
|
+
upstream_group: list[CatalogWorkspace],
|
|
114
|
+
) -> WorkspaceDataMaps:
|
|
115
|
+
"""Creates maps which are dependent on both the source group and upstream group."""
|
|
116
|
+
map_object.workspace_id_to_name_map = self._get_id_to_name_map(
|
|
117
|
+
source_group, upstream_group
|
|
118
|
+
)
|
|
119
|
+
map_object.upstream_ids = self.get_set_of_upstream_workspace_ids(
|
|
120
|
+
upstream_group
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
return map_object
|