PyPI - gooddata-pipelines - Versions diffs - 1.49.1.dev2__py3-none-any.whl → 1.50.1.dev1__py3-none-any.whl - Mend

gooddata-pipelines 1.49.1.dev2py3-none-any.whl → 1.50.1.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of gooddata-pipelines might be problematic. Click here for more details.

Files changed (23) hide show

gooddata_pipelines/provisioning/entities/users/models/users.py CHANGED Viewed

@@ -5,10 +5,8 @@ from typing import Any
 from gooddata_sdk.catalog.user.entity_model.user import CatalogUser
 from pydantic import BaseModel
-from gooddata_pipelines.provisioning.utils.utils import SplitMixin
-class BaseUser(BaseModel, SplitMixin):
+class BaseUser(BaseModel):
     """Base class containing shared user fields and functionality."""
     user_id: str
@@ -18,21 +16,6 @@ class BaseUser(BaseModel, SplitMixin):
     auth_id: str | None
     user_groups: list[str]
-    @classmethod
-    def _create_from_dict_data(
-        cls, user_data: dict[str, Any], delimiter: str = ","
-    ) -> dict[str, Any]:
-        """Helper method to extract common data from dict."""
-        user_groups = cls.split(user_data["user_groups"], delimiter=delimiter)
-        return {
-            "user_id": user_data["user_id"],
-            "firstname": user_data["firstname"],
-            "lastname": user_data["lastname"],
-            "email": user_data["email"],
-            "auth_id": user_data["auth_id"],
-            "user_groups": user_groups,
-        }
     @classmethod
     def _create_from_sdk_data(cls, obj: CatalogUser) -> dict[str, Any]:
         """Helper method to extract common data from SDK object."""
@@ -68,47 +51,24 @@ class BaseUser(BaseModel, SplitMixin):
         )
-class UserIncrementalLoad(BaseUser):
-    """User model for incremental load operations with active status tracking."""
-    is_active: bool
-    @classmethod
-    def from_list_of_dicts(
-        cls, data: list[dict[str, Any]], delimiter: str = ","
-    ) -> list["UserIncrementalLoad"]:
-        """Creates a list of User objects from list of dicts."""
-        converted_users = []
-        for user in data:
-            base_data = cls._create_from_dict_data(user, delimiter)
-            base_data["is_active"] = user["is_active"]
-            converted_users.append(cls(**base_data))
-        return converted_users
+class UserFullLoad(BaseUser):
+    """Input validator for full load of user provisioning."""
     @classmethod
-    def from_sdk_obj(cls, obj: CatalogUser) -> "UserIncrementalLoad":
+    def from_sdk_obj(cls, obj: CatalogUser) -> "UserFullLoad":
         """Creates GDUserTarget from CatalogUser SDK object."""
         base_data = cls._create_from_sdk_data(obj)
-        base_data["is_active"] = True
         return cls(**base_data)
-class UserFullLoad(BaseUser):
-    """User model for full load operations."""
+class UserIncrementalLoad(BaseUser):
+    """Input validator for incremental load of user provisioning."""
-    @classmethod
-    def from_list_of_dicts(
-        cls, data: list[dict[str, Any]], delimiter: str = ","
-    ) -> list["UserFullLoad"]:
-        """Creates a list of User objects from list of dicts."""
-        converted_users = []
-        for user in data:
-            base_data = cls._create_from_dict_data(user, delimiter)
-            converted_users.append(cls(**base_data))
-        return converted_users
+    is_active: bool
     @classmethod
-    def from_sdk_obj(cls, obj: CatalogUser) -> "UserFullLoad":
+    def from_sdk_obj(cls, obj: CatalogUser) -> "UserIncrementalLoad":
         """Creates GDUserTarget from CatalogUser SDK object."""
         base_data = cls._create_from_sdk_data(obj)
+        base_data["is_active"] = True
         return cls(**base_data)

gooddata_pipelines/provisioning/entities/users/permissions.py CHANGED Viewed

@@ -6,10 +6,10 @@ from typing import TypeVar
 from gooddata_pipelines.api.exceptions import GoodDataApiException
 from gooddata_pipelines.provisioning.entities.users.models.permissions import (
+    EntityType,
     PermissionDeclaration,
     PermissionFullLoad,
     PermissionIncrementalLoad,
-    PermissionType,
     TargetsPermissionDict,
     WSPermissionsDeclarations,
 )
@@ -28,12 +28,18 @@ class PermissionProvisioner(
     """Provisioning class for user permissions in GoodData workspaces.
     This class handles the provisioning of user permissions based on the provided
-    source data.
+    source data. Use the `full_load` or `incremental_load`
+    methods to run the provisioning.
     """
     source_group_incremental: list[PermissionIncrementalLoad]
     source_group_full: list[PermissionFullLoad]
+    FULL_LOAD_TYPE: type[PermissionFullLoad] = PermissionFullLoad
+    INCREMENTAL_LOAD_TYPE: type[PermissionIncrementalLoad] = (
+        PermissionIncrementalLoad
+    )
     def _get_ws_declaration(self, ws_id: str) -> PermissionDeclaration:
         users: TargetsPermissionDict = {}
         user_groups: TargetsPermissionDict = {}
@@ -47,7 +53,7 @@ class PermissionProvisioner(
             )
             target_dict = (
                 users
-                if permission_type == PermissionType.user.value
+                if permission_type == EntityType.user.value
                 else user_groups
             )
@@ -105,11 +111,13 @@ class PermissionProvisioner(
         self, permission: PermissionFullLoad | PermissionIncrementalLoad
     ) -> None:
         """Validates if the permission is correctly defined."""
-        if permission.type_ == PermissionType.user:
-            self._api.get_user(permission.id_, error_message="User not found")
+        if permission.entity_type == EntityType.user:
+            self._api.get_user(
+                permission.entity_id, error_message="User not found"
+            )
         else:
             self._api.get_user_group(
-                permission.id_, error_message="User group not found"
+                permission.entity_id, error_message="User group not found"
             )
         self._api.get_workspace(

gooddata_pipelines/provisioning/entities/users/user_groups.py CHANGED Viewed

@@ -21,13 +21,19 @@ class UserGroupProvisioner(
     """Provisioning class for user groups in GoodData workspaces.
     This class handles the creation, update, and deletion of user groups
-    based on the provided source data.
+    based on the provided source data. Use the `full_load` or `incremental_load`
+    methods to run the provisioning.
     """
     source_group_incremental: list[UserGroupIncrementalLoad]
     source_group_full: list[UserGroupFullLoad]
     upstream_user_groups: list[CatalogUserGroup]
+    FULL_LOAD_TYPE: type[UserGroupFullLoad] = UserGroupFullLoad
+    INCREMENTAL_LOAD_TYPE: type[UserGroupIncrementalLoad] = (
+        UserGroupIncrementalLoad
+    )
     @staticmethod
     def _is_changed(
         group: UserGroupModel, existing_group: CatalogUserGroup

gooddata_pipelines/provisioning/entities/users/users.py CHANGED Viewed

@@ -30,6 +30,9 @@ class UserProvisioner(Provisioning[UserFullLoad, UserIncrementalLoad]):
     source_group_incremental: list[UserIncrementalLoad]
     source_group_full: list[UserFullLoad]
+    FULL_LOAD_TYPE: type[UserFullLoad] = UserFullLoad
+    INCREMENTAL_LOAD_TYPE: type[UserIncrementalLoad] = UserIncrementalLoad
     def __init__(self, host: str, token: str) -> None:
         super().__init__(host, token)
         self.upstream_user_cache: dict[UserId, UserModel] = {}

gooddata_pipelines/provisioning/entities/workspaces/models.py CHANGED Viewed

@@ -1,29 +1,27 @@
 # (C) 2025 GoodData Corporation
 """Module containing models related to workspace provisioning in GoodData Cloud."""
-from dataclasses import dataclass, field
 from typing import Literal
+import attrs
 from pydantic import BaseModel, ConfigDict
-@dataclass
+@attrs.define
 class WorkspaceDataMaps:
     """Dataclass to hold various mappings related to workspace data."""
-    child_to_parent_id_map: dict[str, str] = field(default_factory=dict)
-    workspace_id_to_wdf_map: dict[str, dict[str, list[str]]] = field(
-        default_factory=dict
+    child_to_parent_id_map: dict[str, str] = attrs.field(factory=dict)
+    workspace_id_to_wdf_map: dict[str, dict[str, list[str]]] = attrs.field(
+        factory=dict
     )
-    parent_ids: set[str] = field(default_factory=set)
-    source_ids: set[str] = field(default_factory=set)
-    workspace_id_to_name_map: dict[str, str] = field(default_factory=dict)
-    upstream_ids: set[str] = field(default_factory=set)
+    parent_ids: set[str] = attrs.field(factory=set)
+    source_ids: set[str] = attrs.field(factory=set)
+    workspace_id_to_name_map: dict[str, str] = attrs.field(factory=dict)
+    upstream_ids: set[str] = attrs.field(factory=set)
-class WorkspaceFullLoad(BaseModel):
-    """Model representing input for provisioning of workspaces in GoodData Cloud."""
+class WorkspaceBase(BaseModel):
     model_config = ConfigDict(coerce_numbers_to_str=True)
     parent_id: str
@@ -33,10 +31,13 @@ class WorkspaceFullLoad(BaseModel):
     workspace_data_filter_values: list[str] | None = None
-class WorkspaceIncrementalLoad(WorkspaceFullLoad):
-    """Model representing input for incremental provisioning of workspaces in GoodData Cloud."""
+class WorkspaceFullLoad(WorkspaceBase):
+    """Input validator for full load of workspace provisioning."""
+class WorkspaceIncrementalLoad(WorkspaceBase):
+    """Input validator for incremental load of workspace provisioning."""
-    # TODO: double check that the model loads the data correctly, write a test
     is_active: bool

gooddata_pipelines/provisioning/entities/workspaces/workspace.py CHANGED Viewed

@@ -35,11 +35,19 @@ class WorkspaceProvisioner(
     source_group_full: list[WorkspaceFullLoad]
     source_group_incremental: list[WorkspaceIncrementalLoad]
+    FULL_LOAD_TYPE: type[WorkspaceFullLoad] = WorkspaceFullLoad
+    INCREMENTAL_LOAD_TYPE: type[WorkspaceIncrementalLoad] = (
+        WorkspaceIncrementalLoad
+    )
+    upstream_group: list[CatalogWorkspace]
     def __init__(self, *args: str, **kwargs: str) -> None:
         """Creates an instance of the WorkspaceProvisioner.
         Calls the superclass constructor and initializes the validator, parser,
-        and maps for workspace data.
+        and maps for workspace data. Use the `full_load` or `incremental_load`
+        methods to run the provisioning.
         """
         super().__init__(*args, **kwargs)
         self.validator: WorkspaceDataValidator = WorkspaceDataValidator(
@@ -91,10 +99,11 @@ class WorkspaceProvisioner(
         workspace_ids_to_update: set[str],
         child_to_parent_map: dict[str, str],
         workspace_id_to_wdf_map: dict[str, dict[str, list[str]]],
+        source_group: list[WorkspaceFullLoad] | list[WorkspaceIncrementalLoad],
     ) -> None:
         action: Literal["CREATE", "UPDATE"]
-        for source_workspace in self.source_group_full:
+        for source_workspace in source_group:
             if source_workspace.workspace_id in workspace_ids_to_update:
                 action = "UPDATE"
             elif source_workspace.workspace_id in workspace_ids_to_create:
@@ -199,8 +208,8 @@ class WorkspaceProvisioner(
         )
         # Get upstream children of all parent workspaces.
-        self.upstream_group: list[CatalogWorkspace] = (
-            self._api.get_panther_children_workspaces(self.maps.parent_ids)
+        self.upstream_group = self._api.get_panther_children_workspaces(
+            self.maps.parent_ids
         )
         # Set maps that require upstream data.
@@ -234,6 +243,7 @@ class WorkspaceProvisioner(
             self.ids_to_update,
             self.maps.child_to_parent_id_map,
             self.maps.workspace_id_to_wdf_map,
+            self.source_group_full,
         )
         # Check WDF settings of ignored workspaces.
@@ -259,5 +269,42 @@ class WorkspaceProvisioner(
     def _provision_incremental_load(self) -> None:
         """Incremental workspace provisioning."""
+        # Set the maps based on the source data.
+        self.maps = self.parser.set_maps_based_on_source(
+            self.maps, self.source_group_incremental
+        )
+        # Get upstream children of all parent workspaces.
+        self.upstream_group = self._api.get_panther_children_workspaces(
+            self.maps.parent_ids
+        )
+        # Set maps that require upstream data.
+        self.maps = self.parser.set_maps_with_upstream_data(
+            self.maps, self.source_group_incremental, self.upstream_group
+        )
-        raise NotImplementedError("Not implemented yet.")
+        # Create an instance of WDF manager with the created maps.
+        self.wdf_manager = WorkspaceDataFilterManager(self._api, self.maps)
+        # Iterate through the source data and sort workspace ID to groups
+        ids_to_update: set[str] = set()
+        ids_to_delete: set[str] = set()
+        for workspace in self.source_group_incremental:
+            if workspace.is_active:
+                ids_to_update.add(workspace.workspace_id)
+            else:
+                ids_to_delete.add(workspace.workspace_id)
+        self._create_or_update_panther_workspaces(
+            set(),
+            ids_to_update,
+            self.maps.child_to_parent_id_map,
+            self.maps.workspace_id_to_wdf_map,
+            self.source_group_incremental,
+        )
+        self.delete_panther_workspaces(
+            ids_to_delete, self.maps.workspace_id_to_name_map
+        )

gooddata_pipelines/provisioning/entities/workspaces/workspace_data_parser.py CHANGED Viewed

@@ -9,6 +9,7 @@ from gooddata_sdk.catalog.workspace.entity_model.workspace import (
 from gooddata_pipelines.provisioning.entities.workspaces.models import (
     WorkspaceDataMaps,
     WorkspaceFullLoad,
+    WorkspaceIncrementalLoad,
 )
@@ -17,7 +18,7 @@ class WorkspaceDataParser:
     @staticmethod
     def _get_id_to_name_map(
-        source_group: list[WorkspaceFullLoad],
+        source_group: list[WorkspaceFullLoad] | list[WorkspaceIncrementalLoad],
         upstream_group: list[CatalogWorkspace],
     ) -> dict[str, str]:
         """Creates a map of workspace IDs to their names for all known workspaces."""
@@ -33,7 +34,7 @@ class WorkspaceDataParser:
     @staticmethod
     def _get_child_to_parent_map(
-        source_group: list[WorkspaceFullLoad],
+        source_group: list[WorkspaceFullLoad] | list[WorkspaceIncrementalLoad],
     ) -> dict[str, str]:
         """Creates a map of child workspace IDs to their parent workspace IDs."""
         child_to_parent_map: dict[str, str] = {
@@ -45,7 +46,8 @@ class WorkspaceDataParser:
     @staticmethod
     def _get_set_of_ids_from_source(
-        source_group: list[WorkspaceFullLoad], column_name: str
+        source_group: list[WorkspaceFullLoad] | list[WorkspaceIncrementalLoad],
+        column_name: str,
     ) -> set[str]:
         """Creates a set of unique parent workspace IDs."""
         set_of_ids: set[str] = {
@@ -64,7 +66,8 @@ class WorkspaceDataParser:
         return set_of_ids
     def _get_child_to_wdfs_map(
-        self, source_group: list[WorkspaceFullLoad]
+        self,
+        source_group: list[WorkspaceFullLoad] | list[WorkspaceIncrementalLoad],
     ) -> dict[str, dict[str, list[str]]]:
         """Creates a map of child workspace IDs to their WDF IDs."""
         # TODO: Use objects or a more transparent data structure instead of this.
@@ -88,7 +91,7 @@ class WorkspaceDataParser:
     def set_maps_based_on_source(
         self,
         map_object: WorkspaceDataMaps,
-        source_group: list[WorkspaceFullLoad],
+        source_group: list[WorkspaceFullLoad] | list[WorkspaceIncrementalLoad],
     ) -> WorkspaceDataMaps:
         """Creates maps which are dependent on the source group only."""
         map_object.child_to_parent_id_map = self._get_child_to_parent_map(
@@ -109,7 +112,7 @@ class WorkspaceDataParser:
     def set_maps_with_upstream_data(
         self,
         map_object: WorkspaceDataMaps,
-        source_group: list[WorkspaceFullLoad],
+        source_group: list[WorkspaceFullLoad] | list[WorkspaceIncrementalLoad],
         upstream_group: list[CatalogWorkspace],
     ) -> WorkspaceDataMaps:
         """Creates maps which are dependent on both the source group and upstream group."""

gooddata_pipelines/provisioning/provisioning.py CHANGED Viewed

@@ -24,6 +24,9 @@ class Provisioning(Generic[TFullLoadSourceData, TIncrementalSourceData]):
     source_group_full: list[TFullLoadSourceData]
     source_group_incremental: list[TIncrementalSourceData]
+    FULL_LOAD_TYPE: type[TFullLoadSourceData]
+    INCREMENTAL_LOAD_TYPE: type[TIncrementalSourceData]
     def __init__(self, host: str, token: str) -> None:
         self.source_id: set[str] = set()
         self.upstream_id: set[str] = set()
@@ -80,6 +83,17 @@ class Provisioning(Generic[TFullLoadSourceData, TIncrementalSourceData]):
             ids_to_create=ids_to_create,
         )
+    def _validate_source_data_type(
+        self,
+        source_data: list[TFullLoadSourceData] | list[TIncrementalSourceData],
+        model: type[TFullLoadSourceData] | type[TIncrementalSourceData],
+    ) -> None:
+        """Validates data type of the source data."""
+        if not all(isinstance(record, model) for record in source_data):
+            raise TypeError(
+                f"Not all elements in source data are instances of {model.__name__}"
+            )
     def _provision_incremental_load(self) -> None:
         raise NotImplementedError(
             "Provisioning method to be implemented in the subclass."
@@ -100,11 +114,13 @@ class Provisioning(Generic[TFullLoadSourceData, TIncrementalSourceData]):
         That means:
         - All workspaces declared in the source data are created if missing, or
         updated to match the source data
-        - All workspaces not declared in the source data are deleted
+        - All child workspaces not declared under the parent workspace in the
+        source data are deleted
         """
-        self.source_group_full = source_data
         try:
+            self._validate_source_data_type(source_data, self.FULL_LOAD_TYPE)
+            self.source_group_full = source_data
             self._provision_full_load()
             self.logger.info("Provisioning completed.")
         except Exception as e:
@@ -116,12 +132,14 @@ class Provisioning(Generic[TFullLoadSourceData, TIncrementalSourceData]):
         """Runs incremental provisioning workflow with the provided source data.
         Incremental provisioning is used to modify a subset of the upstream workspaces
-        based on the source data provided.
+        based on the source data provided. Only changes requested in the source
+        data will be applied.
         """
-        # TODO: validate the data type of source group at runtime
-        self.source_group_incremental = source_data
         try:
+            self._validate_source_data_type(
+                source_data, self.INCREMENTAL_LOAD_TYPE
+            )
+            self.source_group_incremental = source_data
             self._provision_incremental_load()
             self.logger.info("Provisioning completed.")
         except Exception as e:

gooddata_pipelines/provisioning/utils/context_objects.py CHANGED Viewed

@@ -16,10 +16,10 @@ class WorkspaceContext:
         wdf_id: str | None = None,
         wdf_values: list[str] | None = None,
     ):
-        self.workspace_id: str = workspace_id if workspace_id else "NA"
-        self.workspace_name: str | None = workspace_name
-        self.wdf_id: str | None = wdf_id
-        self.wdf_values: list[str] | None = wdf_values
+        self.workspace_id = workspace_id if workspace_id else "NA"
+        self.workspace_name = workspace_name
+        self.wdf_id = wdf_id
+        self.wdf_values = wdf_values
 class UserContext:
@@ -28,5 +28,5 @@ class UserContext:
     def __init__(self, user_id: str, user_groups: list[str]):
         """User context object, stringifies list of user groups"""
-        self.user_id: str = user_id
-        self.user_groups: str = ",".join(user_groups)
+        self.user_id = user_id
+        self.user_groups = ",".join(user_groups)

gooddata_pipelines/provisioning/utils/utils.py CHANGED Viewed

@@ -2,7 +2,7 @@
 """Module for utilities used in GoodData Pipelines provisioning."""
-from pydantic import BaseModel
+import attrs
 from requests import Response
@@ -61,20 +61,8 @@ class AttributesMixin:
         return attrs
-class SplitMixin:
-    @staticmethod
-    def split(string_value: str, delimiter: str = ",") -> list[str]:
-        """
-        Splits a string by the given delimiter and returns a list of stripped values.
-        If the input is empty, returns an empty list.
-        """
-        if not string_value:
-            return []
-        return [value.strip() for value in string_value.split(delimiter)]
-class EntityGroupIds(BaseModel):
+@attrs.define
+class EntityGroupIds:
     ids_in_both_systems: set[str]
     ids_to_delete: set[str]
     ids_to_create: set[str]

gooddata_pipelines/utils/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+# (C) 2025 GoodData Corporation
+"""
+Utility modules for gooddata-pipelines package.
+"""
+from .rate_limiter import RateLimiter
+__all__ = ["RateLimiter"]

gooddata_pipelines/utils/rate_limiter.py ADDED Viewed

@@ -0,0 +1,64 @@
+# (C) 2025 GoodData Corporation
+import time
+import threading
+import functools
+from typing import Callable, Any, Literal
+class RateLimiter:
+    """
+    Rate limiter usable as a decorator and as a context manager.
+      - Shared instance decorator:   limiter = RateLimiter(); @limiter
+      - Per-function decorator:      @RateLimiter(calls_per_second=2)
+      - Context manager:             with RateLimiter(2): ...
+    """
+    def __init__(self, calls_per_second: float = 1.0) -> None:
+        if calls_per_second <= 0:
+            raise ValueError("calls_per_second must be greater than 0")
+        self.calls_per_second = calls_per_second
+        self.min_interval = 1.0 / calls_per_second
+        self._lock = threading.Lock()
+        self._last_call_time = 0.0
+    def wait_if_needed(self) -> float:
+        """Sleep if needed to maintain the rate limit, return actual sleep time."""
+        with self._lock:
+            now = time.monotonic()
+            since_last = now - self._last_call_time
+            if since_last < self.min_interval:
+                sleep_time = self.min_interval - since_last
+                time.sleep(sleep_time)
+                self._last_call_time = time.monotonic()
+                return sleep_time
+            else:
+                self._last_call_time = now
+                return 0.0
+    # Decorator support
+    def __call__(self, func: Callable[..., Any]) -> Callable[..., Any]:
+        @functools.wraps(func)
+        def wrapper(*args: Any, **kwargs: Any) -> Any:
+            self.wait_if_needed()
+            return func(*args, **kwargs)
+        return wrapper
+    # Context manager support
+    def __enter__(self) -> "RateLimiter":
+        self.wait_if_needed()
+        return self
+    def __exit__(
+        self, exc_type: Any, exc_val: Any, exc_tb: Any
+    ) -> Literal[False]:
+        return False
+    def reset(self) -> None:
+        """Reset the limiter (useful in tests)."""
+        with self._lock:
+            self._last_call_time = 0.0

{gooddata_pipelines-1.49.1.dev2.dist-info → gooddata_pipelines-1.50.1.dev1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: gooddata-pipelines
-Version: 1.49.1.dev2
+Version: 1.50.1.dev1
 Summary: GoodData Cloud lifecycle automation pipelines
 Author-email: GoodData <support@gooddata.com>
 License: MIT
@@ -8,7 +8,7 @@ License-File: LICENSE.txt
 Requires-Python: >=3.10
 Requires-Dist: boto3-stubs<2.0.0,>=1.39.3
 Requires-Dist: boto3<2.0.0,>=1.39.3
-Requires-Dist: gooddata-sdk~=1.49.1.dev2
+Requires-Dist: gooddata-sdk~=1.50.1.dev1
 Requires-Dist: pydantic<3.0.0,>=2.11.3
 Requires-Dist: requests<3.0.0,>=2.32.3
 Requires-Dist: types-pyyaml<7.0.0,>=6.0.12.20250326

gooddata-pipelines 1.49.1.dev2__py3-none-any.whl → 1.50.1.dev1__py3-none-any.whl

Potentially problematic release.

gooddata-pipelines 1.49.1.dev2py3-none-any.whl → 1.50.1.dev1py3-none-any.whl