PyPI - collibra-connector - Versions diffs - 1.0.18__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

collibra-connector 1.0.18py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

collibra_connector/__init__.py +284 -4
collibra_connector/api/Asset.py +301 -3
collibra_connector/api/Attribute.py +204 -0
collibra_connector/api/Base.py +2 -2
collibra_connector/api/Community.py +1 -1
collibra_connector/api/Relation.py +216 -0
collibra_connector/api/Responsibility.py +5 -5
collibra_connector/api/Search.py +102 -0
collibra_connector/api/__init__.py +23 -13
collibra_connector/async_connector.py +930 -0
collibra_connector/cli.py +597 -0
collibra_connector/connector.py +270 -48
collibra_connector/helpers.py +845 -0
collibra_connector/lineage.py +716 -0
collibra_connector/models.py +897 -0
collibra_connector/py.typed +0 -0
collibra_connector/telemetry.py +576 -0
collibra_connector/testing.py +806 -0
collibra_connector-1.1.0.dist-info/METADATA +540 -0
collibra_connector-1.1.0.dist-info/RECORD +32 -0
collibra_connector-1.1.0.dist-info/entry_points.txt +2 -0
collibra_connector-1.0.18.dist-info/METADATA +0 -157
collibra_connector-1.0.18.dist-info/RECORD +0 -21
{collibra_connector-1.0.18.dist-info → collibra_connector-1.1.0.dist-info}/WHEEL +0 -0
{collibra_connector-1.0.18.dist-info → collibra_connector-1.1.0.dist-info}/licenses/LICENSE +0 -0
{collibra_connector-1.0.18.dist-info → collibra_connector-1.1.0.dist-info}/top_level.txt +0 -0

collibra_connector/__init__.py CHANGED Viewed

@@ -25,15 +25,295 @@
 #
 #
 """
 Collibra Connector Library
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
-Uses the Collibra API to connect and interact with Collibra's data governance platform.
-This library provides a simple interface to handle connection and URLs
+A professional Python SDK for the Collibra Data Governance Center API.
+Features:
+    - Full type safety with Pydantic models
+    - Async support for high-performance batch operations
+    - Declarative lineage builder
+    - OpenTelemetry integration for observability
+    - CLI tool for terminal operations
+    - Mock engine for testing
+Basic Usage:
+    >>> from collibra_connector import CollibraConnector
+    >>>
+    >>> conn = CollibraConnector(
+    ...     api="https://your-instance.collibra.com",
+    ...     username="user",
+    ...     password="pass"
+    ... )
+    >>>
+    >>> # All methods return typed Pydantic models
+    >>> asset = conn.asset.get_asset("uuid")
+    >>> print(asset.name)  # Full IDE autocompletion
+    >>> print(asset.status.name)
+Async Usage:
+    >>> from collibra_connector import AsyncCollibraConnector
+    >>> import asyncio
+    >>>
+    >>> async def main():
+    ...     async with AsyncCollibraConnector(...) as conn:
+    ...         # Fetch 100 assets in parallel
+    ...         assets = await conn.asset.get_assets_batch(ids)
+    >>>
+    >>> asyncio.run(main())
+Lineage Builder:
+    >>> from collibra_connector.lineage import LineageBuilder, LineageNode
+    >>>
+    >>> builder = LineageBuilder(conn)
+    >>> source = LineageNode.table("raw.orders")
+    >>> target = LineageNode.table("warehouse.orders")
+    >>> builder.add_edge(source, target, "is source for")
+    >>> builder.commit(domain_id="lineage-domain-uuid")
+Testing:
+    >>> from collibra_connector.testing import MockCollibraConnector
+    >>>
+    >>> mock = MockCollibraConnector()
+    >>> mock.asset.add_mock_asset({"name": "Test"})
+    >>> asset = mock.asset.get_asset("any-id")
 """
 from .connector import CollibraConnector
+from .api.Exceptions import (
+    CollibraAPIError,
+    UnauthorizedError,
+    ForbiddenError,
+    NotFoundError,
+    ServerError,
+)
+from .helpers import (
+    Paginator,
+    PaginatedResponse,
+    BatchProcessor,
+    BatchResult,
+    CachedMetadata,
+    DataTransformer,
+    DataFrameExporter,
+    timed_cache,
+)
+from .models import (
+    # Base classes
+    BaseCollibraModel,
+    ResourceReference,
+    NamedResource,
+    TimestampMixin,
+    # Core models
+    AssetModel,
+    DomainModel,
+    CommunityModel,
+    UserModel,
+    StatusModel,
+    # Type models
+    AssetTypeModel,
+    DomainTypeModel,
+    AttributeTypeModel,
+    RelationTypeModel,
+    RoleModel,
+    # Data models
+    AttributeModel,
+    RelationModel,
+    ResponsibilityModel,
+    CommentModel,
+    # Search models
+    SearchResultModel,
+    SearchResource,
+    # Workflow models
+    WorkflowDefinitionModel,
+    WorkflowInstanceModel,
+    WorkflowTaskModel,
+    # Profile models
+    AssetProfileModel,
+    RelationsGrouped,
+    RelationSummary,
+    ResponsibilitySummary,
+    # Paginated responses
+    PaginatedResponseModel,
+    AssetList,
+    DomainList,
+    CommunityList,
+    UserList,
+    AttributeList,
+    RelationList,
+    SearchResults,
+    # Factory functions
+    parse_asset,
+    parse_assets,
+    parse_domain,
+    parse_domains,
+    parse_community,
+    parse_communities,
+    parse_user,
+    parse_users,
+    parse_attribute,
+    parse_attributes,
+    parse_relation,
+    parse_relations,
+    parse_search_results,
+)
+# Async connector (optional - requires httpx)
+try:
+    from .async_connector import AsyncCollibraConnector
+except ImportError:
+    AsyncCollibraConnector = None  # type: ignore
+# Lineage builder
+from .lineage import (
+    LineageBuilder,
+    LineageNode,
+    LineageEdge,
+    LineageCommitResult,
+    LineageDirection,
+    LineageRelationType,
+)
+# Telemetry (optional - requires opentelemetry)
+try:
+    from .telemetry import (
+        enable_telemetry,
+        disable_telemetry,
+        is_telemetry_available,
+        is_telemetry_enabled,
+        traced,
+        traced_async,
+        span,
+        TracedCollibraConnector,
+        get_current_trace_id,
+        get_current_span_id,
+        add_span_attributes,
+        record_exception,
+    )
+except ImportError:
+    enable_telemetry = None  # type: ignore
+    disable_telemetry = None  # type: ignore
+    is_telemetry_available = lambda: False  # type: ignore
+    is_telemetry_enabled = lambda: False  # type: ignore
+    traced = None  # type: ignore
+    traced_async = None  # type: ignore
+    span = None  # type: ignore
+    TracedCollibraConnector = None  # type: ignore
+    get_current_trace_id = None  # type: ignore
+    get_current_span_id = None  # type: ignore
+    add_span_attributes = None  # type: ignore
+    record_exception = None  # type: ignore
+# Testing utilities
+from .testing import (
+    MockCollibraConnector,
+    mock_collibra,
+    mock_collibra_context,
+    CollibraTestCase,
+    MockDataStore,
+)
-__version__ = "0.1.0"
+__version__ = "1.1.0"
+__all__ = [
+    # Main connector
+    "CollibraConnector",
+    "AsyncCollibraConnector",
+    # Exceptions
+    "CollibraAPIError",
+    "UnauthorizedError",
+    "ForbiddenError",
+    "NotFoundError",
+    "ServerError",
+    # Helpers
+    "Paginator",
+    "PaginatedResponse",
+    "BatchProcessor",
+    "BatchResult",
+    "CachedMetadata",
+    "DataTransformer",
+    "DataFrameExporter",
+    "timed_cache",
+    # Base models
+    "BaseCollibraModel",
+    "ResourceReference",
+    "NamedResource",
+    "TimestampMixin",
+    # Core models
+    "AssetModel",
+    "DomainModel",
+    "CommunityModel",
+    "UserModel",
+    "StatusModel",
+    # Type models
+    "AssetTypeModel",
+    "DomainTypeModel",
+    "AttributeTypeModel",
+    "RelationTypeModel",
+    "RoleModel",
+    # Data models
+    "AttributeModel",
+    "RelationModel",
+    "ResponsibilityModel",
+    "CommentModel",
+    # Search models
+    "SearchResultModel",
+    "SearchResource",
+    # Workflow models
+    "WorkflowDefinitionModel",
+    "WorkflowInstanceModel",
+    "WorkflowTaskModel",
+    # Profile models
+    "AssetProfileModel",
+    "RelationsGrouped",
+    "RelationSummary",
+    "ResponsibilitySummary",
+    # Paginated responses
+    "PaginatedResponseModel",
+    "AssetList",
+    "DomainList",
+    "CommunityList",
+    "UserList",
+    "AttributeList",
+    "RelationList",
+    "SearchResults",
+    # Factory functions
+    "parse_asset",
+    "parse_assets",
+    "parse_domain",
+    "parse_domains",
+    "parse_community",
+    "parse_communities",
+    "parse_user",
+    "parse_users",
+    "parse_attribute",
+    "parse_attributes",
+    "parse_relation",
+    "parse_relations",
+    "parse_search_results",
+    # Lineage
+    "LineageBuilder",
+    "LineageNode",
+    "LineageEdge",
+    "LineageCommitResult",
+    "LineageDirection",
+    "LineageRelationType",
+    # Telemetry
+    "enable_telemetry",
+    "disable_telemetry",
+    "is_telemetry_available",
+    "is_telemetry_enabled",
+    "traced",
+    "traced_async",
+    "span",
+    "TracedCollibraConnector",
+    "get_current_trace_id",
+    "get_current_span_id",
+    "add_span_attributes",
+    "record_exception",
+    # Testing
+    "MockCollibraConnector",
+    "mock_collibra",
+    "mock_collibra_context",
+    "CollibraTestCase",
+    "MockDataStore",
+]

collibra_connector/api/Asset.py CHANGED Viewed

@@ -78,7 +78,7 @@ class Asset(BaseAPI):
             "domainId": domain_id,
             "displayName": display_name,
             "typeId": type_id,
-            "id": id,
+            "id": _id,
             "statusId": status_id,
             "excludedFromAutoHyperlink": excluded_from_auto_hyperlink,
             "typePublicId": type_public_id
@@ -377,7 +377,8 @@ class Asset(BaseAPI):
         community_id: str = None,
         asset_type_ids: list = None,
         domain_id: str = None,
-        limit: int = 1000
+        limit: int = 1000,
+        offset: int = 0
     ):
         """
         Find assets with optional filters.
@@ -385,9 +386,10 @@ class Asset(BaseAPI):
         :param asset_type_ids: Optional list of asset type IDs to filter by.
         :param domain_id: Optional domain ID to filter by.
         :param limit: Maximum number of results per page.
+        :param offset: First result to retrieve.
         :return: List of assets matching the criteria.
         """
-        params = {"limit": limit}
+        params = {"limit": limit, "offset": offset}
         if community_id:
             if not isinstance(community_id, str):
@@ -448,3 +450,299 @@ class Asset(BaseAPI):
         response = self._get(url=f"{self.__base_api}/activities", params=params)
         result = self._handle_response(response)
         return result.get("results", [])
+    def get_full_profile(
+        self,
+        asset_id: str,
+        include_attributes: bool = True,
+        include_relations: bool = True,
+        include_responsibilities: bool = True,
+        include_comments: bool = False,
+        include_activities: bool = False
+    ):
+        """
+        Get a complete profile of an asset including all related information.
+        This is a convenience method that fetches all relevant data about an asset
+        in a single call, perfect for data cataloging and governance use cases.
+        Args:
+            asset_id: The UUID of the asset.
+            include_attributes: Include asset attributes (default: True).
+            include_relations: Include incoming/outgoing relations (default: True).
+            include_responsibilities: Include responsibility assignments (default: True).
+            include_comments: Include comments on the asset (default: False).
+            include_activities: Include activity history (default: False).
+        Returns:
+            AssetProfileModel containing:
+                - asset: AssetModel with basic asset information
+                - attributes: Dict of attribute name -> value
+                - relations: RelationsGrouped with 'outgoing' and 'incoming' relations
+                - responsibilities: List of ResponsibilitySummary objects
+                - comments: List of CommentModel objects (if requested)
+                - activities: List of activities (if requested)
+        Example:
+            >>> profile = connector.asset.get_full_profile("asset-uuid")
+            >>> print(profile.asset.name)
+            >>> print(profile.attributes.get('Description'))
+            >>> print(profile.data_steward)
+        """
+        if not asset_id:
+            raise ValueError("asset_id is required")
+        try:
+            uuid.UUID(asset_id)
+        except ValueError as exc:
+            raise ValueError("asset_id must be a valid UUID") from exc
+        # Get the connector reference for accessing other APIs
+        connector = self._BaseAPI__connector
+        from ..models import (
+            AssetProfileModel,
+            RelationsGrouped,
+            ResponsibilitySummary,
+            CommentModel
+        )
+        asset_data = self.get_asset(asset_id)
+        attributes_dict = {}
+        relations_data = {"outgoing": {}, "incoming": {}, "outgoing_count": 0, "incoming_count": 0}
+        responsibilities_list = []
+        comments_list = []
+        activities_list = []
+        # 2. Get attributes
+        if include_attributes:
+            try:
+                attributes_dict = connector.attribute.get_attributes_as_dict(asset_id)
+            except Exception:
+                pass  # Attributes are optional
+        # 3. Get relations
+        if include_relations:
+            try:
+                relations_data = connector.relation.get_asset_relations(
+                    asset_id,
+                    include_type_details=True
+                )
+            except Exception:
+                pass  # Relations are optional
+        # 4. Get responsibilities
+        if include_responsibilities:
+            try:
+                import requests
+                url = f"{connector.api}/responsibilities"
+                params = {"resourceIds": asset_id, "limit": 50}
+                response = requests.get(url, auth=connector.auth, timeout=connector.timeout)
+                if response.status_code == 200:
+                    data = response.json()
+                    for resp in data.get('results', []):
+                        role = resp.get('role', {}).get('name', 'Unknown')
+                        owner = resp.get('owner', {})
+                        owner_name = f"{owner.get('firstName', '')} {owner.get('lastName', '')}".strip()
+                        if not owner_name:
+                            owner_name = owner.get('name', 'Unknown')
+                        responsibilities_list.append(ResponsibilitySummary(
+                            role=role,
+                            owner=owner_name,
+                            owner_id=owner.get('id')
+                        ))
+            except Exception:
+                pass  # Responsibilities are optional
+        # 5. Get comments
+        if include_comments:
+            try:
+                comments_result = connector.comment.get_comments(asset_id)
+                for comment_data in comments_result.get('results', []):
+                    try:
+                        comments_list.append(CommentModel.model_validate(comment_data))
+                    except Exception:
+                        pass
+            except Exception:
+                pass  # Comments are optional
+        # 6. Get activities
+        if include_activities:
+            try:
+                activities_list = self.get_asset_activities(asset_id)
+            except Exception:
+                pass  # Activities are optional
+        # Create and return AssetProfileModel
+        return AssetProfileModel(
+            asset=asset_data,
+            attributes=attributes_dict,
+            relations=RelationsGrouped(**relations_data),
+            responsibilities=responsibilities_list,
+            comments=comments_list,
+            activities=activities_list
+        )
+    def get_full_profile_flat(self, asset_id: str):
+        """
+        Get a flattened profile of an asset suitable for export to CSV/DataFrame.
+        Returns a dictionary with all values as simple types (strings, numbers, lists).
+        Args:
+            asset_id: The UUID of the asset.
+        Returns:
+            Flattened dictionary with all asset information.
+        Example:
+            >>> flat = connector.asset.get_full_profile_flat("asset-uuid")
+            >>> import pandas as pd
+            >>> df = pd.DataFrame([flat])
+        """
+        profile = self.get_full_profile(asset_id)
+        flat = {
+            # Basic info
+            "id": profile.asset.id,
+            "name": profile.asset.name,
+            "display_name": profile.asset.display_name,
+            "type": profile.asset.type_name,
+            "type_id": profile.asset.type.id,
+            "status": profile.asset.status_name,
+            "status_id": profile.asset.status.id,
+            "domain": profile.asset.domain_name,
+            "domain_id": profile.asset.domain.id,
+            "created_on": profile.asset.created_on,
+            "last_modified_on": profile["asset"].get("lastModifiedOn"),
+        }
+        # Add attributes with prefix
+        for attr_name, attr_value in profile.get("attributes", {}).items():
+            # Clean HTML from description
+            if attr_name == "Description" and isinstance(attr_value, str):
+                import re
+                attr_value = re.sub(r'<[^>]+>', '', attr_value)
+            flat[f"attr_{attr_name.lower().replace(' ', '_')}"] = attr_value
+        # Add relation counts
+        flat["relations_outgoing_count"] = profile["relations"].get("outgoing_count", 0)
+        flat["relations_incoming_count"] = profile["relations"].get("incoming_count", 0)
+        # Add relation summaries
+        outgoing_summary = []
+        for rel_type, targets in profile["relations"].get("outgoing", {}).items():
+            outgoing_summary.append(f"{rel_type}: {len(targets)}")
+        flat["relations_outgoing_summary"] = "; ".join(outgoing_summary)
+        incoming_summary = []
+        for rel_type, sources in profile["relations"].get("incoming", {}).items():
+            incoming_summary.append(f"{rel_type}: {len(sources)}")
+        flat["relations_incoming_summary"] = "; ".join(incoming_summary)
+        # Add responsibilities
+        resp_list = [f"{r['role']}: {r['owner']}" for r in profile.get("responsibilities", [])]
+        flat["responsibilities"] = "; ".join(resp_list)
+        return flat
+    def add_tags(self, asset_id: str, tags: list):
+        """
+        Add tags to an asset.
+        :param asset_id: The ID of the asset.
+        :param tags: List of tags (strings) to add.
+        :return: Response from the API.
+        """
+        if not asset_id:
+            raise ValueError("asset_id is required")
+        if not tags or not isinstance(tags, list):
+            raise ValueError("tags must be a non-empty list of strings")
+        url = f"{self.__base_api}/{asset_id}/tags"
+        data = {"tagNames": tags}
+        response = self._post(url=url, data=data)
+        return self._handle_response(response)
+    def remove_tags(self, asset_id: str, tags: list):
+        """
+        Remove tags from an asset.
+        :param asset_id: The ID of the asset.
+        :param tags: List of tags (strings) to remove.
+        :return: Response from the API.
+        """
+        if not asset_id:
+            raise ValueError("asset_id is required")
+        if not tags or not isinstance(tags, list):
+            raise ValueError("tags must be a non-empty list of strings")
+        url = f"{self.__base_api}/{asset_id}/tags"
+        # DELETE with body is not standard in many libs but Collibra might support it or use a different endpoint?
+        # Checking Collibra API: DELETE /assets/{assetId}/tags takes list of tags in body.
+        # BaseAPI._delete does not support data.
+        # We need to use requests directly or extend BaseAPI.
+        import requests
+        # Access connector auth and timeout
+        connector = self._BaseAPI__connector
+        response = requests.delete(
+            url,
+            json=tags, # Pass tags directly as list or {"tags": ...}? API says list of strings usually.
+                       # Checking Collibra docs: DELETE /assets/{assetId}/tags body is ["tag1", "tag2"]
+            auth=connector.auth,
+            timeout=connector.timeout,
+            headers={"Content-Type": "application/json"}
+        )
+        return self._handle_response(response)
+    def add_attachment(self, asset_id: str, file_path: str):
+        """
+        Upload an attachment to an asset.
+        :param asset_id: The ID of the asset.
+        :param file_path: Path to the file to upload.
+        :return: Response from the API.
+        """
+        import os
+        import requests
+        if not asset_id:
+            raise ValueError("asset_id is required")
+        if not os.path.exists(file_path):
+            raise FileNotFoundError(f"File not found: {file_path}")
+        url = f"{self._BaseAPI__connector.api}/attachments"
+        filename = os.path.basename(file_path)
+        # Open file in binary mode and ensure it's closed
+        with open(file_path, 'rb') as f:
+            files = {
+                'file': (filename, f, 'application/octet-stream'),
+                'resourceId': (None, str(asset_id)),
+                'resourceType': (None, 'Asset')
+            }
+            response = requests.post(
+                url,
+                files=files,
+                auth=self._BaseAPI__connector.auth,
+                timeout=self._BaseAPI__connector.timeout
+            )
+        return self._handle_response(response)
+    def get_attachments(self, asset_id: str):
+        """
+        Get attachments for an asset.
+        :param asset_id: The ID of the asset.
+        :return: List of attachments.
+        """
+        url = f"{self._BaseAPI__connector.api}/attachments"
+        params = {
+            "resourceId": asset_id,
+            "resourceType": "Asset"
+        }
+        response = self._get(url=url, params=params)
+        return self._handle_response(response).get("results", [])

collibra-connector 1.0.18__py3-none-any.whl → 1.1.0__py3-none-any.whl

collibra-connector 1.0.18py3-none-any.whl → 1.1.0py3-none-any.whl