gooddata-pipelines 1.47.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gooddata-pipelines might be problematic. Click here for more details.

Files changed (54) hide show
  1. gooddata_pipelines/__init__.py +59 -0
  2. gooddata_pipelines/_version.py +7 -0
  3. gooddata_pipelines/api/__init__.py +5 -0
  4. gooddata_pipelines/api/exceptions.py +41 -0
  5. gooddata_pipelines/api/gooddata_api.py +309 -0
  6. gooddata_pipelines/api/gooddata_api_wrapper.py +36 -0
  7. gooddata_pipelines/api/gooddata_sdk.py +374 -0
  8. gooddata_pipelines/api/utils.py +43 -0
  9. gooddata_pipelines/backup_and_restore/__init__.py +1 -0
  10. gooddata_pipelines/backup_and_restore/backup_input_processor.py +195 -0
  11. gooddata_pipelines/backup_and_restore/backup_manager.py +430 -0
  12. gooddata_pipelines/backup_and_restore/constants.py +42 -0
  13. gooddata_pipelines/backup_and_restore/csv_reader.py +41 -0
  14. gooddata_pipelines/backup_and_restore/models/__init__.py +1 -0
  15. gooddata_pipelines/backup_and_restore/models/input_type.py +11 -0
  16. gooddata_pipelines/backup_and_restore/models/storage.py +58 -0
  17. gooddata_pipelines/backup_and_restore/models/workspace_response.py +51 -0
  18. gooddata_pipelines/backup_and_restore/storage/__init__.py +1 -0
  19. gooddata_pipelines/backup_and_restore/storage/base_storage.py +18 -0
  20. gooddata_pipelines/backup_and_restore/storage/local_storage.py +37 -0
  21. gooddata_pipelines/backup_and_restore/storage/s3_storage.py +71 -0
  22. gooddata_pipelines/logger/__init__.py +8 -0
  23. gooddata_pipelines/logger/logger.py +115 -0
  24. gooddata_pipelines/provisioning/__init__.py +31 -0
  25. gooddata_pipelines/provisioning/assets/wdf_setting.json +14 -0
  26. gooddata_pipelines/provisioning/entities/__init__.py +1 -0
  27. gooddata_pipelines/provisioning/entities/user_data_filters/__init__.py +1 -0
  28. gooddata_pipelines/provisioning/entities/user_data_filters/models/__init__.py +1 -0
  29. gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py +32 -0
  30. gooddata_pipelines/provisioning/entities/user_data_filters/user_data_filters.py +221 -0
  31. gooddata_pipelines/provisioning/entities/users/__init__.py +1 -0
  32. gooddata_pipelines/provisioning/entities/users/models/__init__.py +1 -0
  33. gooddata_pipelines/provisioning/entities/users/models/permissions.py +242 -0
  34. gooddata_pipelines/provisioning/entities/users/models/user_groups.py +64 -0
  35. gooddata_pipelines/provisioning/entities/users/models/users.py +114 -0
  36. gooddata_pipelines/provisioning/entities/users/permissions.py +153 -0
  37. gooddata_pipelines/provisioning/entities/users/user_groups.py +212 -0
  38. gooddata_pipelines/provisioning/entities/users/users.py +179 -0
  39. gooddata_pipelines/provisioning/entities/workspaces/__init__.py +1 -0
  40. gooddata_pipelines/provisioning/entities/workspaces/models.py +78 -0
  41. gooddata_pipelines/provisioning/entities/workspaces/workspace.py +263 -0
  42. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_filters.py +286 -0
  43. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_parser.py +123 -0
  44. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_validator.py +188 -0
  45. gooddata_pipelines/provisioning/provisioning.py +132 -0
  46. gooddata_pipelines/provisioning/utils/__init__.py +1 -0
  47. gooddata_pipelines/provisioning/utils/context_objects.py +32 -0
  48. gooddata_pipelines/provisioning/utils/exceptions.py +95 -0
  49. gooddata_pipelines/provisioning/utils/utils.py +80 -0
  50. gooddata_pipelines/py.typed +0 -0
  51. gooddata_pipelines-1.47.1.dev1.dist-info/METADATA +85 -0
  52. gooddata_pipelines-1.47.1.dev1.dist-info/RECORD +54 -0
  53. gooddata_pipelines-1.47.1.dev1.dist-info/WHEEL +4 -0
  54. gooddata_pipelines-1.47.1.dev1.dist-info/licenses/LICENSE.txt +1 -277
@@ -0,0 +1,188 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ """Module for validating workspace data integrity in GoodData Cloud."""
4
+
5
+ from typing import Any
6
+
7
+ from requests import Response
8
+
9
+ from gooddata_pipelines.api import GoodDataApi
10
+ from gooddata_pipelines.logger.logger import LogObserver
11
+ from gooddata_pipelines.provisioning.entities.workspaces.models import (
12
+ WorkspaceFullLoad,
13
+ )
14
+ from gooddata_pipelines.provisioning.utils.context_objects import (
15
+ WorkspaceContext,
16
+ )
17
+ from gooddata_pipelines.provisioning.utils.exceptions import (
18
+ WorkspaceDataIntegrityException,
19
+ WorkspaceException,
20
+ )
21
+
22
+
23
+ class WorkspaceDataValidator:
24
+ """Class for validating workspace data integrity before provisioning."""
25
+
26
+ def __init__(self, api: GoodDataApi):
27
+ """
28
+ Initializes the WorkspaceDataValidator with the GoodData API instance.
29
+
30
+ Args:
31
+ api (GoodDataApi): An instance of the GoodData API client.
32
+ """
33
+ self.api = api
34
+ self.logger = LogObserver()
35
+
36
+ def _check_basic_integrity(
37
+ self,
38
+ source_group: list[WorkspaceFullLoad],
39
+ ) -> tuple[set[str], dict[str, list[str]]]:
40
+ """
41
+ Checks that mandatory fields are not empty and that that the combinations
42
+ of values are unique.
43
+
44
+ Returns a set of parent workspaces and a dictionary of parent-wdf mappings.
45
+ """
46
+ parent_workspaces: set[str] = set()
47
+ parent_wdf_map: dict[str, list[str]] = {}
48
+ parent_child_wdf_ids: list[tuple[str, str, str | None]] = []
49
+
50
+ # Check that fields are not empty
51
+ for workspace in source_group:
52
+ parent_id: str | None = workspace.parent_id
53
+ workspace_id: str | None = workspace.workspace_id
54
+ workspace_name: str | None = workspace.workspace_name
55
+ wdf_id: str | None = workspace.workspace_data_filter_id
56
+ wdf_values: list[str] | None = (
57
+ workspace.workspace_data_filter_values
58
+ )
59
+
60
+ # Create a context for the workspace validation
61
+ validation_context: WorkspaceContext = WorkspaceContext(
62
+ workspace_id=workspace_id,
63
+ workspace_name=workspace_name,
64
+ wdf_id=wdf_id,
65
+ wdf_values=wdf_values,
66
+ )
67
+
68
+ # Raise specific error if both parent_id and workspace_id are not defined
69
+ if (parent_id is None or parent_id == "") and (
70
+ workspace_id is None or workspace_id == ""
71
+ ):
72
+ raise WorkspaceDataIntegrityException(
73
+ "Parent ID and workspace ID are not defined for at least one row. Please check the source data."
74
+ )
75
+
76
+ # Raise error if parent_id is not defined
77
+ if parent_id is None or parent_id == "":
78
+ raise WorkspaceDataIntegrityException(
79
+ "Parent ID is not defined in source data.",
80
+ validation_context,
81
+ )
82
+
83
+ # Add parent_id to the set of unique parent workspaces
84
+ parent_workspaces.add(parent_id)
85
+
86
+ # Raise error if workspace_id is not defined
87
+ if workspace_id is None or workspace_id == "":
88
+ raise WorkspaceDataIntegrityException(
89
+ f"Workspace ID is not defined for parent {parent_id}"
90
+ )
91
+
92
+ # Raise error if wdf_id is not defined but has values
93
+ if wdf_id is not None and wdf_id != "":
94
+ if wdf_values is None or wdf_values == []:
95
+ raise WorkspaceDataIntegrityException(
96
+ "WDF ID is defined but no WDF values are provided",
97
+ validation_context,
98
+ )
99
+
100
+ # Add wdf_id to the parent-wdf dict if the value is defined
101
+ if not parent_wdf_map.get(parent_id):
102
+ parent_wdf_map[parent_id] = []
103
+
104
+ parent_wdf_map[parent_id].append(wdf_id)
105
+
106
+ # Raise error if wdf_values are defined but wdf_id is not defined
107
+ if wdf_values is not None and wdf_values != []:
108
+ if wdf_id is None or wdf_id == "":
109
+ raise WorkspaceDataIntegrityException(
110
+ "WDF values are provided but WDF ID is not defined.",
111
+ validation_context,
112
+ )
113
+
114
+ parent_child_wdf_ids.append((parent_id, workspace_id, wdf_id))
115
+
116
+ # Check whether there are non-unique combinations in data
117
+ if len(parent_child_wdf_ids) != len(set(parent_child_wdf_ids)):
118
+ # Log the error to the database as a warning, but continue execution
119
+ self.logger.warning(
120
+ "Duplicate combinations of parent_id, workspace_id, "
121
+ + "wdf_id exist in the source data."
122
+ )
123
+
124
+ return parent_workspaces, parent_wdf_map
125
+
126
+ def _check_parent_exist(self, parent_id: str) -> None:
127
+ """
128
+ Raises an error if a parent workspace does not exist in Panther.
129
+ """
130
+ if not self.api.check_workspace_exists(parent_id):
131
+ raise WorkspaceException(
132
+ f"Parent workspace {parent_id} does not exist in Panther.",
133
+ workspace_id=parent_id,
134
+ )
135
+
136
+ def _check_wdf_is_set_on_parent(
137
+ self, parent_id: str, source_wdf_ids: list[str]
138
+ ) -> None:
139
+ """Raises an error if the parent workspace does not contain any of the defined wdf_id."""
140
+ wdf_response: Response = self.api.get_all_workspace_data_filters(
141
+ parent_id
142
+ )
143
+ wdf_json: dict[str, Any] = wdf_response.json()
144
+ wdf_data: list[dict[str, Any]] = wdf_json.get("data", [])
145
+ wdf_ids_on_parent: set[str] = {wdf["id"] for wdf in wdf_data}
146
+
147
+ for source_wdf_id in source_wdf_ids:
148
+ if source_wdf_id not in wdf_ids_on_parent:
149
+ raise WorkspaceException(
150
+ f"WDF is not set on parent workspace {parent_id}.",
151
+ wdf_id=source_wdf_id,
152
+ workspace_id=parent_id,
153
+ )
154
+
155
+ def validate_source_data(
156
+ self, source_group: list[WorkspaceFullLoad]
157
+ ) -> None:
158
+ """
159
+ Validates the source data integrity.
160
+
161
+ **Raises error when**:
162
+ - the list of workspaces is empty
163
+ - parent_id is not defined
164
+ - workspace_id is not defined
165
+ - The parent workspace does not exist
166
+ - The parent workspace does not contain defined wdf_id.
167
+ - wdf_id is defined but wdf_values is not defined
168
+ - wdf_values are defined but wdf_id is not defined
169
+
170
+ **Logs a warning when**:
171
+ - There are more values for the parent_id, workspace_id, wdf_id combination.
172
+ """
173
+ if not source_group:
174
+ # Raise error if source is empty
175
+ raise WorkspaceException(
176
+ "No workspaces found in the source database."
177
+ )
178
+
179
+ unique_parents, parent_wdf_map = self._check_basic_integrity(
180
+ source_group
181
+ )
182
+
183
+ for parent_id in unique_parents:
184
+ self._check_parent_exist(parent_id)
185
+
186
+ self._check_wdf_is_set_on_parent(
187
+ parent_id, parent_wdf_map[parent_id]
188
+ )
@@ -0,0 +1,132 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ """Provisioning base class for GoodData Pipelines."""
4
+
5
+ from pathlib import Path
6
+ from typing import Generic, Type, TypeVar
7
+
8
+ from gooddata_sdk.utils import PROFILES_FILE_PATH, profile_content
9
+
10
+ from gooddata_pipelines.api import GoodDataApi
11
+ from gooddata_pipelines.logger.logger import (
12
+ LogObserver,
13
+ )
14
+ from gooddata_pipelines.provisioning.utils.utils import EntityGroupIds
15
+
16
+ TFullLoadSourceData = TypeVar("TFullLoadSourceData")
17
+ TIncrementalSourceData = TypeVar("TIncrementalSourceData")
18
+
19
+
20
+ class Provisioning(Generic[TFullLoadSourceData, TIncrementalSourceData]):
21
+ """Base provisioning class."""
22
+
23
+ TProvisioning = TypeVar("TProvisioning", bound="Provisioning")
24
+ source_group_full: list[TFullLoadSourceData]
25
+ source_group_incremental: list[TIncrementalSourceData]
26
+
27
+ def __init__(self, host: str, token: str) -> None:
28
+ self.source_id: set[str] = set()
29
+ self.upstream_id: set[str] = set()
30
+ self._api = GoodDataApi(host, token)
31
+ self.logger: LogObserver = LogObserver()
32
+ self.fatal_exception: str = ""
33
+
34
+ @classmethod
35
+ def create(
36
+ cls: Type[TProvisioning], host: str, token: str
37
+ ) -> TProvisioning:
38
+ """Creates a provisioner instance using provided host and token."""
39
+ return cls(host=host, token=token)
40
+
41
+ @classmethod
42
+ def create_from_profile(
43
+ cls: Type[TProvisioning],
44
+ profile: str = "default",
45
+ profiles_path: Path = PROFILES_FILE_PATH,
46
+ ) -> TProvisioning:
47
+ """Creates a provisioner instance using a GoodData profile file."""
48
+ content = profile_content(profile, profiles_path)
49
+ return cls(**content)
50
+
51
+ @staticmethod
52
+ def _create_groups(
53
+ source_id: set[str], panther_id: set[str]
54
+ ) -> EntityGroupIds:
55
+ """Creates groups for provisioning as sets of IDs.
56
+
57
+ Sorts the IDs into three categories:
58
+ - IDs that exist both source and upstream (to be checked further)
59
+ - IDs that exist upstream but not in source (to be deleted)
60
+ - IDs that exist in source but not upstream (to be created)
61
+ """
62
+ ids_in_both_systems: set[str] = source_id.intersection(panther_id)
63
+ ids_to_delete: set[str] = panther_id.difference(source_id)
64
+ ids_to_create: set[str] = source_id.difference(panther_id)
65
+
66
+ return EntityGroupIds(
67
+ ids_in_both_systems=ids_in_both_systems,
68
+ ids_to_delete=ids_to_delete,
69
+ ids_to_create=ids_to_create,
70
+ )
71
+
72
+ def _provision_incremental_load(self) -> None:
73
+ raise NotImplementedError(
74
+ "Provisioning method to be implemented in the subclass."
75
+ )
76
+
77
+ def _provision_full_load(self) -> None:
78
+ raise NotImplementedError(
79
+ "Provisioning method to be implemented in the subclass."
80
+ )
81
+
82
+ def full_load(self, source_data: list[TFullLoadSourceData]) -> None:
83
+ """Runs full provisioning workflow with the provided source data.
84
+
85
+ Full provisioning is a full load of the source data, where the source data
86
+ is assumed to a single source of truth and the upstream workspaces are updated
87
+ to match it.
88
+
89
+ That means:
90
+ - All workspaces declared in the source data are created if missing, or
91
+ updated to match the source data
92
+ - All workspaces not declared in the source data are deleted
93
+ """
94
+ self.source_group_full = source_data
95
+
96
+ try:
97
+ self._provision_full_load()
98
+ self.logger.info("Provisioning completed successfully.")
99
+ except Exception as e:
100
+ self.fatal_exception = str(e)
101
+ self.logger.error(
102
+ f"Provisioning failed. Error: {self.fatal_exception} "
103
+ + f"Context: {e.__dict__}"
104
+ )
105
+
106
+ def incremental_load(
107
+ self, source_data: list[TIncrementalSourceData]
108
+ ) -> None:
109
+ """Runs incremental provisioning workflow with the provided source data.
110
+
111
+ Incremental provisioning is used to modify a subset of the upstream workspaces
112
+ based on the source data provided.
113
+ """
114
+ self.source_group_incremental = source_data
115
+
116
+ try:
117
+ self._provision_incremental_load()
118
+ self.logger.info("Provisioning completed successfully.")
119
+ except Exception as e:
120
+ self.fatal_exception = str(e)
121
+ self.logger.error(
122
+ f"Provisioning failed. Error: {self.fatal_exception} "
123
+ + f"Context: {e.__dict__}"
124
+ )
125
+
126
+ # TODO: implement a sceond provisioning method and name the two differently:
127
+ # 1) provision_incremental - will use the is_active logic, such as user provisioning now
128
+ # 2) provision_full - full load of the source data, like workspaces now
129
+ # Each will have its own implementation and source data model.
130
+ # Both use cases are required and need to be supported.
131
+ # This will also improve the clarity of the code as now provisioning of each
132
+ # entity works differently, leading to confusion.
@@ -0,0 +1 @@
1
+ # (C) 2025 GoodData Corporation
@@ -0,0 +1,32 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ """Module for context objects used in GoodData Pipelines provisioning."""
4
+
5
+
6
+ class WorkspaceContext:
7
+ workspace_id: str
8
+ workspace_name: str | None
9
+ wdf_id: str | None
10
+ wdf_values: list[str] | None
11
+
12
+ def __init__(
13
+ self,
14
+ workspace_id: str | None,
15
+ workspace_name: str | None,
16
+ wdf_id: str | None = None,
17
+ wdf_values: list[str] | None = None,
18
+ ):
19
+ self.workspace_id: str = workspace_id if workspace_id else "NA"
20
+ self.workspace_name: str | None = workspace_name
21
+ self.wdf_id: str | None = wdf_id
22
+ self.wdf_values: list[str] | None = wdf_values
23
+
24
+
25
+ class UserContext:
26
+ user_id: str
27
+ user_groups: str
28
+
29
+ def __init__(self, user_id: str, user_groups: list[str]):
30
+ """User context object, stringifies list of user groups"""
31
+ self.user_id: str = user_id
32
+ self.user_groups: str = ",".join(user_groups)
@@ -0,0 +1,95 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ """Module for exceptions used in GoodData Pipelines provisioning."""
4
+
5
+ from gooddata_pipelines.provisioning.utils.utils import AttributesMixin
6
+
7
+
8
+ # TODO: Use the generic context exception and phase out the specific ones
9
+ # - we don't need to conform to process-specific schema anymore
10
+ class ContextException(Exception, AttributesMixin):
11
+ def __init__(
12
+ self, message: str, *context_objects: object, **kwargs: str
13
+ ) -> None:
14
+ """Exception raised during context processing."""
15
+ super().__init__(message)
16
+ attributes = self.get_attrs(*context_objects, overrides=kwargs)
17
+
18
+ for key, value in attributes.items():
19
+ setattr(self, key, value)
20
+
21
+
22
+ class ProvisioningException(Exception, AttributesMixin):
23
+ def __init__(
24
+ self, message: str, *context_objects: object, **kwargs: str
25
+ ) -> None:
26
+ """Exception raised during provisioning."""
27
+ super().__init__(message)
28
+ self.attributes = self.get_attrs(*context_objects, overrides=kwargs)
29
+ self.error_message: str = message
30
+
31
+
32
+ class WorkspaceException(ProvisioningException):
33
+ def __init__(
34
+ self,
35
+ message: str,
36
+ *context_objects: object,
37
+ **kwargs: str,
38
+ ) -> None:
39
+ """Exception raised during workspace provisioning."""
40
+ super().__init__(message, *context_objects, **kwargs)
41
+
42
+ self.http_status: str = self.attributes.get(
43
+ "http_status", "500 Internal Server Error"
44
+ )
45
+ self.http_method: str | None = self.attributes.get("http_method", "NA")
46
+ self.workspace_id: str = self.attributes.get("workspace_id", "NA")
47
+ self.workspace_name: str | None = self.attributes.get(
48
+ "workspace_name", "NA"
49
+ )
50
+ self.wdf_id: str | None = self.attributes.get("wdf_id", None)
51
+ self.wdf_values: str | None = self.attributes.get("wdf_values", None)
52
+ self.api_endpoint: str = self.attributes.get(
53
+ "api_endpoint", "workspace_provisioning"
54
+ )
55
+
56
+
57
+ class WorkspaceDataIntegrityException(WorkspaceException):
58
+ def __init__(
59
+ self, message: str, *context_objects: object, **kwargs: str
60
+ ) -> None:
61
+ """Exception raised during workspace validation."""
62
+ super().__init__(message, *context_objects, **kwargs)
63
+
64
+ self.workspace_id: str = self.attributes.get("workspace_id", "NA")
65
+ self.workspace_name: str | None = self.attributes.get(
66
+ "workspace_name", None
67
+ )
68
+ self.wdf_id: str | None = self.attributes.get("wdf_id", None)
69
+ self.wdf_values: str | None = self.attributes.get("wdf_values", None)
70
+ self.api_endpoint: str = self.attributes.get(
71
+ "api_endpoint", "workspace_data_validation"
72
+ )
73
+
74
+
75
+ class BaseUserException(ProvisioningException):
76
+ def __init__(
77
+ self, message: str, *context_objects: object, **kwargs: str
78
+ ) -> None:
79
+ """Exception raised during user provisioning."""
80
+ super().__init__(message, *context_objects, **kwargs)
81
+
82
+ self.http_status: str = self.attributes.get(
83
+ "http_status", "500 Internal Server Error"
84
+ )
85
+ self.http_method: str | None = self.attributes.get("http_method", None)
86
+ self.workspace_id: str | None = self.attributes.get(
87
+ "workspace_id", None
88
+ )
89
+ self.user_id: str | None = self.attributes.get("user_id", None)
90
+ self.user_group_id: str | None = self.attributes.get(
91
+ "user_group_id", None
92
+ )
93
+ self.api_endpoint: str = self.attributes.get(
94
+ "api_endpoint", "user_provisioning"
95
+ )
@@ -0,0 +1,80 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ """Module for utilities used in GoodData Pipelines provisioning."""
4
+
5
+ from pydantic import BaseModel
6
+ from requests import Response
7
+
8
+
9
+ class AttributesMixin:
10
+ """
11
+ Mixin class to provide a method for getting attributes of an object which may or may not exist.
12
+ """
13
+
14
+ @staticmethod
15
+ def get_attrs(
16
+ *objects: object, overrides: dict[str, str] | None = None
17
+ ) -> dict[str, str]:
18
+ """
19
+ Returns a dictionary of attributes from the given objects.
20
+
21
+ Args:
22
+ objects: The objects to get the attributes from. Special handling is implemented for
23
+ requests.Response, __dict__ attribute is used for general objects.
24
+ overrides: A dictionary of attributes to override the object's attributes.
25
+ Returns:
26
+ dict: Returns a dictionary of the objects' attributes.
27
+ """
28
+ # TODO: This might not work great with nested objects, values which are lists of objects etc.
29
+ # If we care about parsing the logs back from the string, we should consider some other approach
30
+ attrs: dict[str, str] = {}
31
+ for context_object in objects:
32
+ if isinstance(context_object, Response):
33
+ # for request.Response objects, keys need to be renamed to match the log schema
34
+ attrs.update(
35
+ {
36
+ "http_status": str(context_object.status_code),
37
+ "http_method": getattr(
38
+ context_object.request, "method", "NA"
39
+ ),
40
+ "api_endpoint": getattr(
41
+ context_object.request, "url", "NA"
42
+ ),
43
+ }
44
+ )
45
+ else:
46
+ # Generic handling for other objects
47
+ for key, value in context_object.__dict__.items():
48
+ if value is None:
49
+ continue
50
+
51
+ if isinstance(value, list):
52
+ attrs[key] = ", ".join(
53
+ str(list_item) for list_item in value
54
+ )
55
+ else:
56
+ attrs[key] = str(value)
57
+
58
+ if overrides:
59
+ attrs.update(overrides)
60
+
61
+ return attrs
62
+
63
+
64
+ class SplitMixin:
65
+ @staticmethod
66
+ def split(string_value: str, delimiter: str = ",") -> list[str]:
67
+ """
68
+ Splits a string by the given delimiter and returns a list of stripped values.
69
+ If the input is empty, returns an empty list.
70
+ """
71
+ if not string_value:
72
+ return []
73
+
74
+ return [value.strip() for value in string_value.split(delimiter)]
75
+
76
+
77
+ class EntityGroupIds(BaseModel):
78
+ ids_in_both_systems: set[str]
79
+ ids_to_delete: set[str]
80
+ ids_to_create: set[str]
File without changes
@@ -0,0 +1,85 @@
1
+ Metadata-Version: 2.4
2
+ Name: gooddata-pipelines
3
+ Version: 1.47.1.dev1
4
+ Author-email: GoodData <support@gooddata.com>
5
+ License: MIT
6
+ License-File: LICENSE.txt
7
+ Requires-Python: >=3.10
8
+ Requires-Dist: boto3-stubs<2.0.0,>=1.39.3
9
+ Requires-Dist: boto3<2.0.0,>=1.39.3
10
+ Requires-Dist: gooddata-sdk~=1.47.1.dev1
11
+ Requires-Dist: pydantic<3.0.0,>=2.11.3
12
+ Requires-Dist: requests<3.0.0,>=2.32.3
13
+ Requires-Dist: types-pyyaml<7.0.0,>=6.0.12.20250326
14
+ Requires-Dist: types-requests<3.0.0,>=2.32.0
15
+ Provides-Extra: dev
16
+ Requires-Dist: moto<6.0.0,>=5.1.6; extra == 'dev'
17
+ Requires-Dist: mypy<2.0.0,>=1.16.0; extra == 'dev'
18
+ Requires-Dist: pytest-mock<4.0.0,>=3.14.0; extra == 'dev'
19
+ Requires-Dist: pytest<9.0.0,>=8.3.5; extra == 'dev'
20
+ Requires-Dist: ruff<0.12.0,>=0.11.2; extra == 'dev'
21
+ Description-Content-Type: text/markdown
22
+
23
+ # GoodData Pipelines
24
+
25
+ A high level library for automating the lifecycle of GoodData Cloud (GDC).
26
+
27
+ You can use the package to manage following resoursec in GDC:
28
+
29
+ 1. Provisioning (create, update, delete)
30
+ - User profiles
31
+ - User Groups
32
+ - User/Group permissions
33
+ - User Data Filters
34
+ - Child workspaces (incl. Workspace Data Filter settings)
35
+ 1. _[PLANNED]:_ Backup and restore of workspaces
36
+ 1. _[PLANNED]:_ Custom fields management
37
+ - extend the Logical Data Model of a child workspace
38
+
39
+ In case you are not interested in incorporating a library in your own program, but would like to use a ready-made script, consider having a look at [GoodData Productivity Tools](https://github.com/gooddata/gooddata-productivity-tools).
40
+
41
+ ## Provisioning
42
+
43
+ The entities can be managed either in _full load_ or _incremental_ way.
44
+
45
+ Full load means that the input data should represent the full and complete desired state of GDC after the script has finished. For example, you would include specification of all child workspaces you want to exist in GDC in the input data for workspace provisioning. Any workspaces present in GDC and not defined in the source data (i.e., your input) will be deleted.
46
+
47
+ On the other hand, the incremental load treats the source data as instructions for a specific change, e.g., a creation or a deletion of a specific workspace. You can specify which workspaces you would want to delete or create, while the rest of the workspaces already present in GDC will remain as they are, ignored by the provisioning script.
48
+
49
+ The provisioning module exposes _Provisioner_ classes reflecting the different entities. The typical usage would involve importing the Provisioner class and the data input data model for the class and planned provisioning method:
50
+
51
+ ```python
52
+ import os
53
+ from csv import DictReader
54
+ from pathlib import Path
55
+
56
+ # Import the Entity Provisioner class and corresponing model from gooddata_pipelines library
57
+ from gooddata_pipelines import UserFullLoad, UserProvisioner
58
+
59
+ # Optional: you can set up logging and subscribe it to the Provisioner
60
+ from utils.logger import setup_logging
61
+
62
+ setup_logging()
63
+ logger = logging.getLogger(__name__)
64
+
65
+ # Create the Provisioner instance - you can also create the instance from a GDC yaml profile
66
+ provisioner = UserProvisioner(
67
+ host=os.environ["GDC_HOSTNAME"], token=os.environ["GDC_AUTH_TOKEN"]
68
+ )
69
+
70
+ # Optional: subscribe to logs
71
+ provisioner.logger.subscribe(logger)
72
+
73
+ # Load your data from your data source
74
+ source_data_path: Path = Path("path/to/some.csv")
75
+ source_data_reader = DictReader(source_data_path.read_text().splitlines())
76
+ source_data = [row for row in source_data_reader]
77
+
78
+ # Validate your input data with
79
+ full_load_data: list[UserFullLoad] = UserFullLoad.from_list_of_dicts(
80
+ source_data
81
+ )
82
+ provisioner.full_load(full_load_data)
83
+ ```
84
+
85
+ Ready made scripts covering the basic use cases can be found here in the [GoodData Productivity Tools](https://github.com/gooddata/gooddata-productivity-tools) repository
@@ -0,0 +1,54 @@
1
+ gooddata_pipelines/__init__.py,sha256=H1W1_ZuQyoFiM305bALurYv2S3dKc_9I8sFCjcH9XLo,1826
2
+ gooddata_pipelines/_version.py,sha256=Zi8Ht5ofjFeSYGG5USixQtJNB1po6okh0Rez8VyAsFM,200
3
+ gooddata_pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ gooddata_pipelines/api/__init__.py,sha256=0WaBI2XMdkkZgnUsQ9kqipNzh2l2zamZvUt_qjp8xCk,106
5
+ gooddata_pipelines/api/exceptions.py,sha256=rddQXfv8Ktckz7RONKBnKfm53M7dzPCh50Dl1k-8hqs,1545
6
+ gooddata_pipelines/api/gooddata_api.py,sha256=QFTwn5o7xMEvXAg85qqr9VhkpA1UDQElfUAUbvMYS8o,10881
7
+ gooddata_pipelines/api/gooddata_api_wrapper.py,sha256=t7dFrXJ6X4yXS9XDthOmvd2CyzdnDDNPeIngTEW72YU,1152
8
+ gooddata_pipelines/api/gooddata_sdk.py,sha256=wd5O4e9BQLWUawt6odrs5a51nqFGthBkvqh9WOiW36Q,13734
9
+ gooddata_pipelines/api/utils.py,sha256=3QY_aYH17I9THoCINE3l-n5oj52k-gNeT1wv6Z_VxN8,1433
10
+ gooddata_pipelines/backup_and_restore/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pjdIvtf25ut0r8ZwZVbi4s,32
11
+ gooddata_pipelines/backup_and_restore/backup_input_processor.py,sha256=1-Sh0n4DrBrQ7M3Hzq_iiNe-2cxQx0KDu4QdDok618I,7667
12
+ gooddata_pipelines/backup_and_restore/backup_manager.py,sha256=NS3dKc6C7VVfuFQxG0opQitcc23GwCntbLirAhWQei0,15441
13
+ gooddata_pipelines/backup_and_restore/constants.py,sha256=AO4H6ngsLMs4bCV-RcT7xIi-VZu3smgZWA_3P7lVsQc,907
14
+ gooddata_pipelines/backup_and_restore/csv_reader.py,sha256=0Kw7mJT7REj3Gjqfsc6YT9MbhcqfCGNB_SKBwzTI1rk,1268
15
+ gooddata_pipelines/backup_and_restore/models/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pjdIvtf25ut0r8ZwZVbi4s,32
16
+ gooddata_pipelines/backup_and_restore/models/input_type.py,sha256=CBKJigKdmZ-NJD9MSfNhq89bo86W0AqCMMoyonbd1QA,239
17
+ gooddata_pipelines/backup_and_restore/models/storage.py,sha256=hIIj5CETEroaMTqezzj0ze0eMpPQrzoj5pyp_HBC-xk,1390
18
+ gooddata_pipelines/backup_and_restore/models/workspace_response.py,sha256=eQbYLgRQc17IRG0yPTAJVrD-Xs05SzuwtzoNrPT2DoY,833
19
+ gooddata_pipelines/backup_and_restore/storage/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pjdIvtf25ut0r8ZwZVbi4s,32
20
+ gooddata_pipelines/backup_and_restore/storage/base_storage.py,sha256=67wdItlG3neExeb_eCUDQhswdUB62X5Nyj9sOImB_Hg,487
21
+ gooddata_pipelines/backup_and_restore/storage/local_storage.py,sha256=NvhPRzRAvuSpc5qCDyPqZaMB0i1jeZOZczaSwjUSGEg,1155
22
+ gooddata_pipelines/backup_and_restore/storage/s3_storage.py,sha256=STA0J-xaP7QQefyYK3OncqwcvWfrtJRRB--5PBfEKDU,2608
23
+ gooddata_pipelines/logger/__init__.py,sha256=W-fJvMStnsDUY52AYFhx_LnS2cSCFNf3bB47Iew2j04,129
24
+ gooddata_pipelines/logger/logger.py,sha256=yIMdvqsmOSGQLI4U_tQwxX5E2q_FXUu0Ko7Hv39slFM,3549
25
+ gooddata_pipelines/provisioning/__init__.py,sha256=RZDEiv8nla4Jwa2TZXUdp1NSxg2_-lLqz4h7k2c4v5Y,854
26
+ gooddata_pipelines/provisioning/provisioning.py,sha256=ZVD-Jz_MyLDy7f1D62oJ58sHfW03_LNO7Bguuv4C4xA,5042
27
+ gooddata_pipelines/provisioning/assets/wdf_setting.json,sha256=nxOLGZkEQiMdARcUDER5ygqr3Zu-MQlLlUyXVhPUq64,280
28
+ gooddata_pipelines/provisioning/entities/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pjdIvtf25ut0r8ZwZVbi4s,32
29
+ gooddata_pipelines/provisioning/entities/user_data_filters/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pjdIvtf25ut0r8ZwZVbi4s,32
30
+ gooddata_pipelines/provisioning/entities/user_data_filters/user_data_filters.py,sha256=4Q3tbpTU4FPpCVE1t402mP3lJMrLFTDX4AgWG_pIIg0,8321
31
+ gooddata_pipelines/provisioning/entities/user_data_filters/models/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pjdIvtf25ut0r8ZwZVbi4s,32
32
+ gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py,sha256=y0q5E91AhxIkf_EHW0swCjNUkiiAOFXarAhvjUKVVKw,740
33
+ gooddata_pipelines/provisioning/entities/users/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pjdIvtf25ut0r8ZwZVbi4s,32
34
+ gooddata_pipelines/provisioning/entities/users/permissions.py,sha256=05RptbWJ5L9eZ12R7orG3-wF-w69IwlRzHitMzGokiY,5781
35
+ gooddata_pipelines/provisioning/entities/users/user_groups.py,sha256=Up36pwwlOFS_IdYetViZ7gUHfV2hIgXL4th_k9D31Eo,8266
36
+ gooddata_pipelines/provisioning/entities/users/users.py,sha256=1B1bMk8ysughCoCJs1aX0bI9iUIeAc1hIUyJ0hWyC5M,6503
37
+ gooddata_pipelines/provisioning/entities/users/models/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pjdIvtf25ut0r8ZwZVbi4s,32
38
+ gooddata_pipelines/provisioning/entities/users/models/permissions.py,sha256=Hx-8ac8n5xOyysJJCXu2XpOXL1IqH_w9LKck2qseWBs,8377
39
+ gooddata_pipelines/provisioning/entities/users/models/user_groups.py,sha256=TjlP6oABK6UP7nMKNMlLk3M62eNf9e-3LdKI9-VFwi8,2007
40
+ gooddata_pipelines/provisioning/entities/users/models/users.py,sha256=rKtiRxtelLphw-_BbD-AM_-hPrpp0xqEr1jmuU_oJVg,3767
41
+ gooddata_pipelines/provisioning/entities/workspaces/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pjdIvtf25ut0r8ZwZVbi4s,32
42
+ gooddata_pipelines/provisioning/entities/workspaces/models.py,sha256=4SR4XLd-qJ4pgpb-diHbm5yqZRmXQ_PgvF4iFfSV740,2155
43
+ gooddata_pipelines/provisioning/entities/workspaces/workspace.py,sha256=qlBwzP9nWTucoB9tLMY35na1fIhifvnZoN5D-PJhudY,9770
44
+ gooddata_pipelines/provisioning/entities/workspaces/workspace_data_filters.py,sha256=0dNcK7tkp40XulCj7EPoB4zVeyQbRx2Tt4yAfgLrm50,10736
45
+ gooddata_pipelines/provisioning/entities/workspaces/workspace_data_parser.py,sha256=hjrLZQKdN8vSFjtX9VPiDYUO0RHj06bqzgNzEQqnfR8,4316
46
+ gooddata_pipelines/provisioning/entities/workspaces/workspace_data_validator.py,sha256=t6RWNsrDpebyOgB4c_ctqrkio72jBHqsXqk-ntBTkA4,7225
47
+ gooddata_pipelines/provisioning/utils/__init__.py,sha256=-BG28PGDbalLyZGQjpFG0pjdIvtf25ut0r8ZwZVbi4s,32
48
+ gooddata_pipelines/provisioning/utils/context_objects.py,sha256=sM22hMsxE0XLI1TU0Vs-2kK0vf4YrB1musoAg__4bjc,936
49
+ gooddata_pipelines/provisioning/utils/exceptions.py,sha256=1WnAOlPhqOf0xRcvn70lxAlLb8Oo6m6WCYS4hj9uzDU,3630
50
+ gooddata_pipelines/provisioning/utils/utils.py,sha256=_Tk-mFgbIGpCixDCF9e-3ZYd-g5Jb3SJiLSH465k4jY,2846
51
+ gooddata_pipelines-1.47.1.dev1.dist-info/METADATA,sha256=03kqiteoy2ls0dwnru9_wBg0CfdOBgewyO2w1U2fWQs,3773
52
+ gooddata_pipelines-1.47.1.dev1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
53
+ gooddata_pipelines-1.47.1.dev1.dist-info/licenses/LICENSE.txt,sha256=PNC7WXGIo6OKkNoPLRxlVrw6jaLcjSTUsSxy9Xcu9Jo,560365
54
+ gooddata_pipelines-1.47.1.dev1.dist-info/RECORD,,