gooddata-pipelines 1.47.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gooddata-pipelines might be problematic. Click here for more details.

Files changed (54) hide show
  1. gooddata_pipelines/__init__.py +59 -0
  2. gooddata_pipelines/_version.py +7 -0
  3. gooddata_pipelines/api/__init__.py +5 -0
  4. gooddata_pipelines/api/exceptions.py +41 -0
  5. gooddata_pipelines/api/gooddata_api.py +309 -0
  6. gooddata_pipelines/api/gooddata_api_wrapper.py +36 -0
  7. gooddata_pipelines/api/gooddata_sdk.py +374 -0
  8. gooddata_pipelines/api/utils.py +43 -0
  9. gooddata_pipelines/backup_and_restore/__init__.py +1 -0
  10. gooddata_pipelines/backup_and_restore/backup_input_processor.py +195 -0
  11. gooddata_pipelines/backup_and_restore/backup_manager.py +430 -0
  12. gooddata_pipelines/backup_and_restore/constants.py +42 -0
  13. gooddata_pipelines/backup_and_restore/csv_reader.py +41 -0
  14. gooddata_pipelines/backup_and_restore/models/__init__.py +1 -0
  15. gooddata_pipelines/backup_and_restore/models/input_type.py +11 -0
  16. gooddata_pipelines/backup_and_restore/models/storage.py +58 -0
  17. gooddata_pipelines/backup_and_restore/models/workspace_response.py +51 -0
  18. gooddata_pipelines/backup_and_restore/storage/__init__.py +1 -0
  19. gooddata_pipelines/backup_and_restore/storage/base_storage.py +18 -0
  20. gooddata_pipelines/backup_and_restore/storage/local_storage.py +37 -0
  21. gooddata_pipelines/backup_and_restore/storage/s3_storage.py +71 -0
  22. gooddata_pipelines/logger/__init__.py +8 -0
  23. gooddata_pipelines/logger/logger.py +115 -0
  24. gooddata_pipelines/provisioning/__init__.py +31 -0
  25. gooddata_pipelines/provisioning/assets/wdf_setting.json +14 -0
  26. gooddata_pipelines/provisioning/entities/__init__.py +1 -0
  27. gooddata_pipelines/provisioning/entities/user_data_filters/__init__.py +1 -0
  28. gooddata_pipelines/provisioning/entities/user_data_filters/models/__init__.py +1 -0
  29. gooddata_pipelines/provisioning/entities/user_data_filters/models/udf_models.py +32 -0
  30. gooddata_pipelines/provisioning/entities/user_data_filters/user_data_filters.py +221 -0
  31. gooddata_pipelines/provisioning/entities/users/__init__.py +1 -0
  32. gooddata_pipelines/provisioning/entities/users/models/__init__.py +1 -0
  33. gooddata_pipelines/provisioning/entities/users/models/permissions.py +242 -0
  34. gooddata_pipelines/provisioning/entities/users/models/user_groups.py +64 -0
  35. gooddata_pipelines/provisioning/entities/users/models/users.py +114 -0
  36. gooddata_pipelines/provisioning/entities/users/permissions.py +153 -0
  37. gooddata_pipelines/provisioning/entities/users/user_groups.py +212 -0
  38. gooddata_pipelines/provisioning/entities/users/users.py +179 -0
  39. gooddata_pipelines/provisioning/entities/workspaces/__init__.py +1 -0
  40. gooddata_pipelines/provisioning/entities/workspaces/models.py +78 -0
  41. gooddata_pipelines/provisioning/entities/workspaces/workspace.py +263 -0
  42. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_filters.py +286 -0
  43. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_parser.py +123 -0
  44. gooddata_pipelines/provisioning/entities/workspaces/workspace_data_validator.py +188 -0
  45. gooddata_pipelines/provisioning/provisioning.py +132 -0
  46. gooddata_pipelines/provisioning/utils/__init__.py +1 -0
  47. gooddata_pipelines/provisioning/utils/context_objects.py +32 -0
  48. gooddata_pipelines/provisioning/utils/exceptions.py +95 -0
  49. gooddata_pipelines/provisioning/utils/utils.py +80 -0
  50. gooddata_pipelines/py.typed +0 -0
  51. gooddata_pipelines-1.47.1.dev1.dist-info/METADATA +85 -0
  52. gooddata_pipelines-1.47.1.dev1.dist-info/RECORD +54 -0
  53. gooddata_pipelines-1.47.1.dev1.dist-info/WHEEL +4 -0
  54. gooddata_pipelines-1.47.1.dev1.dist-info/licenses/LICENSE.txt +1 -277
@@ -0,0 +1,71 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ import os
4
+
5
+ import boto3
6
+
7
+ from gooddata_pipelines.backup_and_restore.models.storage import (
8
+ BackupRestoreConfig,
9
+ S3StorageConfig,
10
+ )
11
+ from gooddata_pipelines.backup_and_restore.storage.base_storage import (
12
+ BackupStorage,
13
+ )
14
+
15
+
16
+ class S3Storage(BackupStorage):
17
+ def __init__(self, conf: BackupRestoreConfig):
18
+ super().__init__(conf)
19
+
20
+ if not isinstance(conf.storage, S3StorageConfig):
21
+ raise ValueError("S3 storage config is required")
22
+
23
+ self._config = conf.storage
24
+ self._profile = self._config.profile
25
+ self._session = self._create_boto_session(self._profile)
26
+ self._resource = self._session.resource("s3")
27
+ self._bucket = self._resource.Bucket(self._config.bucket) # type: ignore [missing library stubs]
28
+ suffix = "/" if not self._config.backup_path.endswith("/") else ""
29
+ self._backup_path = self._config.backup_path + suffix
30
+
31
+ self._verify_connection()
32
+
33
+ def _create_boto_session(self, profile: str) -> boto3.Session:
34
+ try:
35
+ return boto3.Session(profile_name=profile)
36
+ except Exception:
37
+ self.logger.warning(
38
+ 'AWS profile "[default]" not found. Trying other fallback methods...'
39
+ )
40
+
41
+ return boto3.Session()
42
+
43
+ def _verify_connection(self) -> None:
44
+ """
45
+ Pings the S3 bucket to verify that the connection is working.
46
+ """
47
+ try:
48
+ # TODO: install boto3 s3 stubs
49
+ self._resource.meta.client.head_bucket(Bucket=self._config.bucket)
50
+ except Exception as e:
51
+ raise RuntimeError(
52
+ f"Failed to connect to S3 bucket {self._config.bucket}: {e}"
53
+ )
54
+
55
+ def export(self, folder: str, org_id: str) -> None:
56
+ """Uploads the content of the folder to S3 as backup."""
57
+ storage_path = f"{self._config.bucket}/{self._backup_path}"
58
+ self.logger.info(f"Uploading {org_id} to {storage_path}")
59
+ folder = f"{folder}/{org_id}"
60
+ for subdir, dirs, files in os.walk(folder):
61
+ full_path = os.path.join(subdir)
62
+ export_path = (
63
+ f"{self._backup_path}{org_id}/{full_path[len(folder) + 1 :]}/"
64
+ )
65
+ self._bucket.put_object(Key=export_path)
66
+
67
+ for file in files:
68
+ full_path = os.path.join(subdir, file)
69
+ with open(full_path, "rb") as data:
70
+ export_path = f"{self._backup_path}{org_id}/{full_path[len(folder) + 1 :]}"
71
+ self._bucket.put_object(Key=export_path, Body=data)
@@ -0,0 +1,8 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ from .logger import LoggerLike, LogObserver
4
+
5
+ __all__ = [
6
+ "LoggerLike",
7
+ "LogObserver",
8
+ ]
@@ -0,0 +1,115 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ """Logging observer for the GoodData Pipelines SDK.
4
+
5
+ This module provides a singleton observer class `LogObserver` that allows
6
+ subscribing logger-like objects to receive log messages. The observer emits
7
+ unformatted log messages to all subscribed objects, which should implement
8
+ the `LoggerLike` protocol.
9
+ """
10
+
11
+ from enum import Enum
12
+ from typing import Any, Protocol
13
+
14
+
15
+ class SingletonMeta(type):
16
+ _instances: dict = {}
17
+
18
+ def __call__(cls, *args: Any, **kwargs: Any) -> "SingletonMeta":
19
+ if cls not in cls._instances:
20
+ instance = super().__call__(*args, **kwargs)
21
+ cls._instances[cls] = instance
22
+ return cls._instances[cls]
23
+
24
+
25
+ class Severity(Enum):
26
+ """Severity levels for logging."""
27
+
28
+ INFO = "info"
29
+ WARNING = "warning"
30
+ ERROR = "error"
31
+
32
+
33
+ class LoggerLike(Protocol):
34
+ """A protocol for a logger-like object.
35
+
36
+ This protocol defines the methods that a logger-like object should implement
37
+ to be compatible with the `LogObserver`. It includes methods for logging
38
+ messages at different severity levels: info, warning, and error.
39
+ """
40
+
41
+ def info(self, *args: Any, **kwargs: Any) -> None: ...
42
+
43
+ def warning(self, *args: Any, **kwargs: Any) -> None: ...
44
+
45
+ def error(self, *args: Any, **kwargs: Any) -> None: ...
46
+
47
+
48
+ class LogObserver(metaclass=SingletonMeta):
49
+ """Singleton observer class for logging messages.
50
+
51
+ Emits unformatted log messages to all subscribed logger-like objects.
52
+ """
53
+
54
+ # TODO: in future we might want to add a timestamp or other metadata
55
+ # (severity...)? Currently that is left out to subscribers to handle.
56
+
57
+ # TODO: with error we're dumping the context as string to the message
58
+ # that could be improved (either passing the context as a separate arg
59
+ # or handling the process here).
60
+
61
+ def __init__(self) -> None:
62
+ self.subscribers: list[LoggerLike] = []
63
+
64
+ def subscribe(self, subscriber: LoggerLike) -> None:
65
+ """Subscribe a logger-like object to receive log messages.
66
+
67
+ Args:
68
+ subscriber (LoggerLike): An object that implements the LoggerLike
69
+ protocol.
70
+ Returns:
71
+ None
72
+ """
73
+ self.subscribers.append(subscriber)
74
+
75
+ def unsubscribe(self, subscriber: LoggerLike) -> None:
76
+ """Unsubscribe a logger-like object from receiving log messages.
77
+
78
+ Args:
79
+ subscriber (LoggerLike): An object that implements the LoggerLike
80
+ protocol.
81
+
82
+ Returns:
83
+ None
84
+ """
85
+ self.subscribers.remove(subscriber)
86
+
87
+ def _notify(self, severity: Severity, msg: str) -> None:
88
+ """Notify all subscribers with a log message.
89
+
90
+ Args:
91
+ severity (Severity): The severity level of the log message.
92
+ msg (str): The log message to be sent to subscribers.
93
+
94
+ Returns:
95
+ None
96
+ """
97
+ for subscriber in self.subscribers:
98
+ if severity == Severity.INFO:
99
+ subscriber.info(msg)
100
+ elif severity == Severity.WARNING:
101
+ subscriber.warning(msg)
102
+ elif severity == Severity.ERROR:
103
+ subscriber.error(msg)
104
+
105
+ def info(self, msg: str) -> None:
106
+ """Log an info message."""
107
+ self._notify(Severity.INFO, msg)
108
+
109
+ def warning(self, msg: str) -> None:
110
+ """Log a warning message."""
111
+ self._notify(Severity.WARNING, msg)
112
+
113
+ def error(self, msg: str) -> None:
114
+ """Log an error message."""
115
+ self._notify(Severity.ERROR, msg)
@@ -0,0 +1,31 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ from .entities.users.models.permissions import (
4
+ PermissionFullLoad,
5
+ PermissionIncrementalLoad,
6
+ )
7
+ from .entities.users.models.user_groups import (
8
+ UserGroupFullLoad,
9
+ UserGroupIncrementalLoad,
10
+ )
11
+ from .entities.users.models.users import (
12
+ UserFullLoad,
13
+ UserIncrementalLoad,
14
+ )
15
+ from .entities.users.permissions import PermissionProvisioner
16
+ from .entities.users.user_groups import UserGroupProvisioner
17
+ from .entities.users.users import UserProvisioner
18
+ from .entities.workspaces.workspace import WorkspaceProvisioner
19
+
20
+ __all__ = [
21
+ "PermissionFullLoad",
22
+ "PermissionIncrementalLoad",
23
+ "PermissionProvisioner",
24
+ "UserFullLoad",
25
+ "UserGroupFullLoad",
26
+ "UserIncrementalLoad",
27
+ "UserGroupIncrementalLoad",
28
+ "UserGroupProvisioner",
29
+ "UserProvisioner",
30
+ "WorkspaceProvisioner",
31
+ ]
@@ -0,0 +1,14 @@
1
+ {
2
+ "data": {
3
+ "attributes": {
4
+ "filterValues": []
5
+ },
6
+ "id": "<wdf_setting_id>",
7
+ "relationships": {
8
+ "workspaceDataFilter": {
9
+ "data": { "id": "<wdf_id>", "type": "workspaceDataFilter" }
10
+ }
11
+ },
12
+ "type": "workspaceDataFilterSetting"
13
+ }
14
+ }
@@ -0,0 +1 @@
1
+ # (C) 2025 GoodData Corporation
@@ -0,0 +1 @@
1
+ # (C) 2025 GoodData Corporation
@@ -0,0 +1 @@
1
+ # (C) 2025 GoodData Corporation
@@ -0,0 +1,32 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ """This module defines data models for user data filters in a GoodData workspace."""
4
+
5
+ # TODO: consider using attrs instead of dataclasses for these models. Dataclasses
6
+ # have different functionality per Python version (not package version).
7
+
8
+ from dataclasses import dataclass, field
9
+
10
+
11
+ @dataclass
12
+ class UserDataFilterGroup:
13
+ udf_id: str
14
+ udf_values: list[str]
15
+
16
+
17
+ @dataclass
18
+ class WorkspaceUserDataFilters:
19
+ workspace_id: str
20
+ user_data_filters: list["UserDataFilterGroup"] = field(default_factory=list)
21
+
22
+
23
+ @dataclass
24
+ class UserDataFilterFullLoad:
25
+ workspace_id: str
26
+ udf_id: str
27
+ udf_value: str
28
+
29
+
30
+ @dataclass
31
+ class UserDataFilterIncrementalLoad(UserDataFilterFullLoad):
32
+ is_active: bool
@@ -0,0 +1,221 @@
1
+ # (C) 2025 GoodData Corporation
2
+
3
+ """Module for provisioning user data filters in GoodData workspaces.
4
+
5
+ This module provides the `UserDataFilterProvisioner` class, which is responsible
6
+ for creating, updating, and deleting user data filters in GoodData workspaces.
7
+ """
8
+
9
+ import re
10
+
11
+ from gooddata_sdk.catalog.workspace.entity_model.user_data_filter import (
12
+ CatalogEntityIdentifier,
13
+ CatalogUserDataFilter,
14
+ CatalogUserDataFilterAttributes,
15
+ CatalogUserDataFilterRelationships,
16
+ )
17
+
18
+ from gooddata_pipelines.provisioning.entities.user_data_filters.models.udf_models import (
19
+ UserDataFilterFullLoad,
20
+ UserDataFilterGroup,
21
+ UserDataFilterIncrementalLoad,
22
+ WorkspaceUserDataFilters,
23
+ )
24
+ from gooddata_pipelines.provisioning.provisioning import Provisioning
25
+ from gooddata_pipelines.provisioning.utils.exceptions import ContextException
26
+
27
+
28
+ class UserDataFilterProvisioner(
29
+ Provisioning[UserDataFilterFullLoad, UserDataFilterIncrementalLoad]
30
+ ):
31
+ """Provisioning class for user data filters in GoodData workspaces.
32
+
33
+ This class handles the creation, update, and deletion of user data filters
34
+ based on the provided source data.
35
+
36
+ Requires setting the `ldm_column_name` and `maql_column_name`
37
+ attributes before calling the `provision` method.
38
+
39
+ Usage:
40
+ ```
41
+ provisioner = UserDataFilterProvisioner(api, source_group)
42
+ provisioner.set_ldm_column_name("ldm_column")
43
+ provisioner.set_maql_column_name("maql_column")
44
+ provisioner.provision()
45
+ ```
46
+ """
47
+
48
+ source_group_full: list[UserDataFilterFullLoad]
49
+ source_group_incremental: list[UserDataFilterIncrementalLoad]
50
+ ldm_column_name: str = ""
51
+ maql_column_name: str = ""
52
+
53
+ def set_ldm_column_name(self, ldm_column_name: str) -> None:
54
+ """Set the LDM column name for user data filters.
55
+
56
+ Args:
57
+ ldm_column_name (str): The LDM column name to set.
58
+ """
59
+ self.ldm_column_name = ldm_column_name
60
+
61
+ def set_maql_column_name(self, maql_column_name: str) -> None:
62
+ """Set the MAQL column name for user data filters.
63
+
64
+ Args:
65
+ maql_column_name (str): The MAQL column name to set.
66
+ """
67
+ self.maql_column_name = maql_column_name
68
+
69
+ @staticmethod
70
+ def _group_db_user_data_filters_by_ws_id(
71
+ user_data_filters: list[UserDataFilterFullLoad],
72
+ ) -> list[WorkspaceUserDataFilters]:
73
+ """Group user data filters by workspace ID and user ID."""
74
+ ws_map: dict[str, dict[str, set[str]]] = {}
75
+
76
+ for udf in user_data_filters:
77
+ ws_map.setdefault(udf.workspace_id, {}).setdefault(
78
+ udf.udf_id, set()
79
+ ).add(str(udf.udf_value))
80
+
81
+ result: list[WorkspaceUserDataFilters] = []
82
+
83
+ for ws_id, udf_dict in ws_map.items():
84
+ udf_groups = [
85
+ UserDataFilterGroup(udf_id=udf_id, udf_values=list(values))
86
+ for udf_id, values in udf_dict.items()
87
+ ]
88
+ result.append(
89
+ WorkspaceUserDataFilters(
90
+ workspace_id=ws_id, user_data_filters=udf_groups
91
+ )
92
+ )
93
+ return result
94
+
95
+ @staticmethod
96
+ def _extract_numbers_from_maql(maql: str) -> list[str]:
97
+ """Extract numbers from a MAQL string."""
98
+ numbers = re.findall(r'"\d+"', maql)
99
+ return [number.strip('"') for number in numbers]
100
+
101
+ def _skip_user_data_filter_update(
102
+ self, existing_udf: list[CatalogUserDataFilter], udf_value: list[str]
103
+ ) -> bool:
104
+ """Check if the user data filter update can be skipped."""
105
+ if not existing_udf:
106
+ return False
107
+ existing_udfs = self._extract_numbers_from_maql(
108
+ existing_udf[0].attributes.maql
109
+ )
110
+ return set(udf_value) == set(existing_udfs)
111
+
112
+ def _create_user_data_filters(
113
+ self, user_data_filter_ids_to_create: list[WorkspaceUserDataFilters]
114
+ ) -> None:
115
+ """Create or update user data filters in GoodData workspaces."""
116
+ for workspace_user_data_filter in user_data_filter_ids_to_create:
117
+ workspace_id = workspace_user_data_filter.workspace_id
118
+ user_data_filters = workspace_user_data_filter.user_data_filters
119
+
120
+ gd_user_data_filters: list[CatalogUserDataFilter] = (
121
+ self._api.list_user_data_filters(workspace_id)
122
+ )
123
+
124
+ gd_udf_ids = {
125
+ user.relationships.user["data"].id
126
+ for user in gd_user_data_filters
127
+ if user.relationships and user.relationships.user
128
+ }
129
+
130
+ db_udf_ids = {udf.udf_id for udf in user_data_filters}
131
+
132
+ udf_ids_to_delete: set[str] = gd_udf_ids.difference(db_udf_ids)
133
+ self._delete_user_data_filters(workspace_id, udf_ids_to_delete)
134
+
135
+ udf_group: UserDataFilterGroup
136
+ for udf_group in user_data_filters:
137
+ udf_id: str = udf_group.udf_id
138
+ udf_values: list[str] = udf_group.udf_values
139
+
140
+ existing_udf: list[CatalogUserDataFilter] = [
141
+ udf for udf in gd_user_data_filters if udf.id == udf_id
142
+ ]
143
+ if self._skip_user_data_filter_update(existing_udf, udf_values):
144
+ continue
145
+
146
+ formatted_udf_values = '", "'.join(
147
+ str(value) for value in udf_values
148
+ )
149
+ maql = f'{self.maql_column_name} IN ("{formatted_udf_values}")'
150
+
151
+ attributes = CatalogUserDataFilterAttributes(maql=maql)
152
+ relationships = CatalogUserDataFilterRelationships(
153
+ labels={
154
+ "data": [
155
+ CatalogEntityIdentifier(
156
+ id=self.ldm_column_name, type="label"
157
+ )
158
+ ]
159
+ },
160
+ user={
161
+ "data": CatalogEntityIdentifier(id=udf_id, type="user")
162
+ },
163
+ )
164
+ user_data_filter = CatalogUserDataFilter(
165
+ id=udf_id,
166
+ attributes=attributes,
167
+ relationships=relationships,
168
+ )
169
+
170
+ try:
171
+ self._api.create_or_update_user_data_filter(
172
+ workspace_id, user_data_filter
173
+ )
174
+ self.logger.info(
175
+ "Created or updated user data filters for user with id "
176
+ + f"{udf_id} for client with id {workspace_id}"
177
+ )
178
+ except Exception as e:
179
+ raise ContextException(
180
+ f"Failed to create user data filters: {e}",
181
+ udf_group,
182
+ user_data_filter,
183
+ ) from e
184
+
185
+ def _delete_user_data_filters(
186
+ self, workspace_id: str, udf_ids_to_delete: set[str]
187
+ ) -> None:
188
+ """Delete user data filters in GoodData workspaces."""
189
+ for udf_id in udf_ids_to_delete:
190
+ try:
191
+ self._api.delete_user_data_filter(workspace_id, udf_id)
192
+ self.logger.info(
193
+ f"Deleted user data filters for user with id {udf_id}"
194
+ )
195
+ except Exception as e:
196
+ raise ContextException(
197
+ f"Failed to delete user data filters: {e}"
198
+ ) from e
199
+
200
+ def _provision_full_load(self) -> None:
201
+ """Provision user data filters in GoodData workspaces."""
202
+
203
+ if not self.maql_column_name:
204
+ raise ContextException(
205
+ "MAQL column name is not set. Please set it before provisioning."
206
+ )
207
+ if not self.ldm_column_name:
208
+ raise ContextException(
209
+ "LDM column name is not set. Please set it before provisioning."
210
+ )
211
+
212
+ grouped_db_user_data_filters = (
213
+ self._group_db_user_data_filters_by_ws_id(self.source_group_full)
214
+ )
215
+ self._create_user_data_filters(grouped_db_user_data_filters)
216
+
217
+ self.logger.info("User data filters provisioning completed")
218
+
219
+ def _provision_incremental_load(self) -> None:
220
+ """Provision user data filters in GoodData workspaces."""
221
+ raise NotImplementedError("Not implemented yet.")
@@ -0,0 +1 @@
1
+ # (C) 2025 GoodData Corporation
@@ -0,0 +1 @@
1
+ # (C) 2025 GoodData Corporation