collibra-connector 1.0.18__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,15 +25,295 @@
25
25
  #
26
26
  #
27
27
  """
28
-
29
28
  Collibra Connector Library
30
29
  ~~~~~~~~~~~~~~~~~~~~~~~~~~
31
30
 
32
- Uses the Collibra API to connect and interact with Collibra's data governance platform.
33
- This library provides a simple interface to handle connection and URLs
31
+ A professional Python SDK for the Collibra Data Governance Center API.
32
+
33
+ Features:
34
+ - Full type safety with Pydantic models
35
+ - Async support for high-performance batch operations
36
+ - Declarative lineage builder
37
+ - OpenTelemetry integration for observability
38
+ - CLI tool for terminal operations
39
+ - Mock engine for testing
40
+
41
+ Basic Usage:
42
+ >>> from collibra_connector import CollibraConnector
43
+ >>>
44
+ >>> conn = CollibraConnector(
45
+ ... api="https://your-instance.collibra.com",
46
+ ... username="user",
47
+ ... password="pass"
48
+ ... )
49
+ >>>
50
+ >>> # All methods return typed Pydantic models
51
+ >>> asset = conn.asset.get_asset("uuid")
52
+ >>> print(asset.name) # Full IDE autocompletion
53
+ >>> print(asset.status.name)
54
+
55
+ Async Usage:
56
+ >>> from collibra_connector import AsyncCollibraConnector
57
+ >>> import asyncio
58
+ >>>
59
+ >>> async def main():
60
+ ... async with AsyncCollibraConnector(...) as conn:
61
+ ... # Fetch 100 assets in parallel
62
+ ... assets = await conn.asset.get_assets_batch(ids)
63
+ >>>
64
+ >>> asyncio.run(main())
65
+
66
+ Lineage Builder:
67
+ >>> from collibra_connector.lineage import LineageBuilder, LineageNode
68
+ >>>
69
+ >>> builder = LineageBuilder(conn)
70
+ >>> source = LineageNode.table("raw.orders")
71
+ >>> target = LineageNode.table("warehouse.orders")
72
+ >>> builder.add_edge(source, target, "is source for")
73
+ >>> builder.commit(domain_id="lineage-domain-uuid")
34
74
 
75
+ Testing:
76
+ >>> from collibra_connector.testing import MockCollibraConnector
77
+ >>>
78
+ >>> mock = MockCollibraConnector()
79
+ >>> mock.asset.add_mock_asset({"name": "Test"})
80
+ >>> asset = mock.asset.get_asset("any-id")
35
81
  """
36
82
 
37
83
  from .connector import CollibraConnector
84
+ from .api.Exceptions import (
85
+ CollibraAPIError,
86
+ UnauthorizedError,
87
+ ForbiddenError,
88
+ NotFoundError,
89
+ ServerError,
90
+ )
91
+ from .helpers import (
92
+ Paginator,
93
+ PaginatedResponse,
94
+ BatchProcessor,
95
+ BatchResult,
96
+ CachedMetadata,
97
+ DataTransformer,
98
+ DataFrameExporter,
99
+ timed_cache,
100
+ )
101
+ from .models import (
102
+ # Base classes
103
+ BaseCollibraModel,
104
+ ResourceReference,
105
+ NamedResource,
106
+ TimestampMixin,
107
+ # Core models
108
+ AssetModel,
109
+ DomainModel,
110
+ CommunityModel,
111
+ UserModel,
112
+ StatusModel,
113
+ # Type models
114
+ AssetTypeModel,
115
+ DomainTypeModel,
116
+ AttributeTypeModel,
117
+ RelationTypeModel,
118
+ RoleModel,
119
+ # Data models
120
+ AttributeModel,
121
+ RelationModel,
122
+ ResponsibilityModel,
123
+ CommentModel,
124
+ # Search models
125
+ SearchResultModel,
126
+ SearchResource,
127
+ # Workflow models
128
+ WorkflowDefinitionModel,
129
+ WorkflowInstanceModel,
130
+ WorkflowTaskModel,
131
+ # Profile models
132
+ AssetProfileModel,
133
+ RelationsGrouped,
134
+ RelationSummary,
135
+ ResponsibilitySummary,
136
+ # Paginated responses
137
+ PaginatedResponseModel,
138
+ AssetList,
139
+ DomainList,
140
+ CommunityList,
141
+ UserList,
142
+ AttributeList,
143
+ RelationList,
144
+ SearchResults,
145
+ # Factory functions
146
+ parse_asset,
147
+ parse_assets,
148
+ parse_domain,
149
+ parse_domains,
150
+ parse_community,
151
+ parse_communities,
152
+ parse_user,
153
+ parse_users,
154
+ parse_attribute,
155
+ parse_attributes,
156
+ parse_relation,
157
+ parse_relations,
158
+ parse_search_results,
159
+ )
160
+
161
+ # Async connector (optional - requires httpx)
162
+ try:
163
+ from .async_connector import AsyncCollibraConnector
164
+ except ImportError:
165
+ AsyncCollibraConnector = None # type: ignore
166
+
167
+ # Lineage builder
168
+ from .lineage import (
169
+ LineageBuilder,
170
+ LineageNode,
171
+ LineageEdge,
172
+ LineageCommitResult,
173
+ LineageDirection,
174
+ LineageRelationType,
175
+ )
176
+
177
+ # Telemetry (optional - requires opentelemetry)
178
+ try:
179
+ from .telemetry import (
180
+ enable_telemetry,
181
+ disable_telemetry,
182
+ is_telemetry_available,
183
+ is_telemetry_enabled,
184
+ traced,
185
+ traced_async,
186
+ span,
187
+ TracedCollibraConnector,
188
+ get_current_trace_id,
189
+ get_current_span_id,
190
+ add_span_attributes,
191
+ record_exception,
192
+ )
193
+ except ImportError:
194
+ enable_telemetry = None # type: ignore
195
+ disable_telemetry = None # type: ignore
196
+ is_telemetry_available = lambda: False # type: ignore
197
+ is_telemetry_enabled = lambda: False # type: ignore
198
+ traced = None # type: ignore
199
+ traced_async = None # type: ignore
200
+ span = None # type: ignore
201
+ TracedCollibraConnector = None # type: ignore
202
+ get_current_trace_id = None # type: ignore
203
+ get_current_span_id = None # type: ignore
204
+ add_span_attributes = None # type: ignore
205
+ record_exception = None # type: ignore
206
+
207
+ # Testing utilities
208
+ from .testing import (
209
+ MockCollibraConnector,
210
+ mock_collibra,
211
+ mock_collibra_context,
212
+ CollibraTestCase,
213
+ MockDataStore,
214
+ )
38
215
 
39
- __version__ = "0.1.0"
216
+ __version__ = "1.1.0"
217
+ __all__ = [
218
+ # Main connector
219
+ "CollibraConnector",
220
+ "AsyncCollibraConnector",
221
+ # Exceptions
222
+ "CollibraAPIError",
223
+ "UnauthorizedError",
224
+ "ForbiddenError",
225
+ "NotFoundError",
226
+ "ServerError",
227
+ # Helpers
228
+ "Paginator",
229
+ "PaginatedResponse",
230
+ "BatchProcessor",
231
+ "BatchResult",
232
+ "CachedMetadata",
233
+ "DataTransformer",
234
+ "DataFrameExporter",
235
+ "timed_cache",
236
+ # Base models
237
+ "BaseCollibraModel",
238
+ "ResourceReference",
239
+ "NamedResource",
240
+ "TimestampMixin",
241
+ # Core models
242
+ "AssetModel",
243
+ "DomainModel",
244
+ "CommunityModel",
245
+ "UserModel",
246
+ "StatusModel",
247
+ # Type models
248
+ "AssetTypeModel",
249
+ "DomainTypeModel",
250
+ "AttributeTypeModel",
251
+ "RelationTypeModel",
252
+ "RoleModel",
253
+ # Data models
254
+ "AttributeModel",
255
+ "RelationModel",
256
+ "ResponsibilityModel",
257
+ "CommentModel",
258
+ # Search models
259
+ "SearchResultModel",
260
+ "SearchResource",
261
+ # Workflow models
262
+ "WorkflowDefinitionModel",
263
+ "WorkflowInstanceModel",
264
+ "WorkflowTaskModel",
265
+ # Profile models
266
+ "AssetProfileModel",
267
+ "RelationsGrouped",
268
+ "RelationSummary",
269
+ "ResponsibilitySummary",
270
+ # Paginated responses
271
+ "PaginatedResponseModel",
272
+ "AssetList",
273
+ "DomainList",
274
+ "CommunityList",
275
+ "UserList",
276
+ "AttributeList",
277
+ "RelationList",
278
+ "SearchResults",
279
+ # Factory functions
280
+ "parse_asset",
281
+ "parse_assets",
282
+ "parse_domain",
283
+ "parse_domains",
284
+ "parse_community",
285
+ "parse_communities",
286
+ "parse_user",
287
+ "parse_users",
288
+ "parse_attribute",
289
+ "parse_attributes",
290
+ "parse_relation",
291
+ "parse_relations",
292
+ "parse_search_results",
293
+ # Lineage
294
+ "LineageBuilder",
295
+ "LineageNode",
296
+ "LineageEdge",
297
+ "LineageCommitResult",
298
+ "LineageDirection",
299
+ "LineageRelationType",
300
+ # Telemetry
301
+ "enable_telemetry",
302
+ "disable_telemetry",
303
+ "is_telemetry_available",
304
+ "is_telemetry_enabled",
305
+ "traced",
306
+ "traced_async",
307
+ "span",
308
+ "TracedCollibraConnector",
309
+ "get_current_trace_id",
310
+ "get_current_span_id",
311
+ "add_span_attributes",
312
+ "record_exception",
313
+ # Testing
314
+ "MockCollibraConnector",
315
+ "mock_collibra",
316
+ "mock_collibra_context",
317
+ "CollibraTestCase",
318
+ "MockDataStore",
319
+ ]
@@ -78,7 +78,7 @@ class Asset(BaseAPI):
78
78
  "domainId": domain_id,
79
79
  "displayName": display_name,
80
80
  "typeId": type_id,
81
- "id": id,
81
+ "id": _id,
82
82
  "statusId": status_id,
83
83
  "excludedFromAutoHyperlink": excluded_from_auto_hyperlink,
84
84
  "typePublicId": type_public_id
@@ -377,7 +377,8 @@ class Asset(BaseAPI):
377
377
  community_id: str = None,
378
378
  asset_type_ids: list = None,
379
379
  domain_id: str = None,
380
- limit: int = 1000
380
+ limit: int = 1000,
381
+ offset: int = 0
381
382
  ):
382
383
  """
383
384
  Find assets with optional filters.
@@ -385,9 +386,10 @@ class Asset(BaseAPI):
385
386
  :param asset_type_ids: Optional list of asset type IDs to filter by.
386
387
  :param domain_id: Optional domain ID to filter by.
387
388
  :param limit: Maximum number of results per page.
389
+ :param offset: First result to retrieve.
388
390
  :return: List of assets matching the criteria.
389
391
  """
390
- params = {"limit": limit}
392
+ params = {"limit": limit, "offset": offset}
391
393
 
392
394
  if community_id:
393
395
  if not isinstance(community_id, str):
@@ -448,3 +450,299 @@ class Asset(BaseAPI):
448
450
  response = self._get(url=f"{self.__base_api}/activities", params=params)
449
451
  result = self._handle_response(response)
450
452
  return result.get("results", [])
453
+
454
+ def get_full_profile(
455
+ self,
456
+ asset_id: str,
457
+ include_attributes: bool = True,
458
+ include_relations: bool = True,
459
+ include_responsibilities: bool = True,
460
+ include_comments: bool = False,
461
+ include_activities: bool = False
462
+ ):
463
+ """
464
+ Get a complete profile of an asset including all related information.
465
+
466
+ This is a convenience method that fetches all relevant data about an asset
467
+ in a single call, perfect for data cataloging and governance use cases.
468
+
469
+ Args:
470
+ asset_id: The UUID of the asset.
471
+ include_attributes: Include asset attributes (default: True).
472
+ include_relations: Include incoming/outgoing relations (default: True).
473
+ include_responsibilities: Include responsibility assignments (default: True).
474
+ include_comments: Include comments on the asset (default: False).
475
+ include_activities: Include activity history (default: False).
476
+
477
+ Returns:
478
+ AssetProfileModel containing:
479
+ - asset: AssetModel with basic asset information
480
+ - attributes: Dict of attribute name -> value
481
+ - relations: RelationsGrouped with 'outgoing' and 'incoming' relations
482
+ - responsibilities: List of ResponsibilitySummary objects
483
+ - comments: List of CommentModel objects (if requested)
484
+ - activities: List of activities (if requested)
485
+
486
+ Example:
487
+ >>> profile = connector.asset.get_full_profile("asset-uuid")
488
+ >>> print(profile.asset.name)
489
+ >>> print(profile.attributes.get('Description'))
490
+ >>> print(profile.data_steward)
491
+ """
492
+ if not asset_id:
493
+ raise ValueError("asset_id is required")
494
+
495
+ try:
496
+ uuid.UUID(asset_id)
497
+ except ValueError as exc:
498
+ raise ValueError("asset_id must be a valid UUID") from exc
499
+
500
+ # Get the connector reference for accessing other APIs
501
+ connector = self._BaseAPI__connector
502
+
503
+ from ..models import (
504
+ AssetProfileModel,
505
+ RelationsGrouped,
506
+ ResponsibilitySummary,
507
+ CommentModel
508
+ )
509
+
510
+ asset_data = self.get_asset(asset_id)
511
+ attributes_dict = {}
512
+ relations_data = {"outgoing": {}, "incoming": {}, "outgoing_count": 0, "incoming_count": 0}
513
+ responsibilities_list = []
514
+ comments_list = []
515
+ activities_list = []
516
+
517
+ # 2. Get attributes
518
+ if include_attributes:
519
+ try:
520
+ attributes_dict = connector.attribute.get_attributes_as_dict(asset_id)
521
+ except Exception:
522
+ pass # Attributes are optional
523
+
524
+ # 3. Get relations
525
+ if include_relations:
526
+ try:
527
+ relations_data = connector.relation.get_asset_relations(
528
+ asset_id,
529
+ include_type_details=True
530
+ )
531
+ except Exception:
532
+ pass # Relations are optional
533
+
534
+ # 4. Get responsibilities
535
+ if include_responsibilities:
536
+ try:
537
+ import requests
538
+ url = f"{connector.api}/responsibilities"
539
+ params = {"resourceIds": asset_id, "limit": 50}
540
+ response = requests.get(url, auth=connector.auth, timeout=connector.timeout)
541
+ if response.status_code == 200:
542
+ data = response.json()
543
+ for resp in data.get('results', []):
544
+ role = resp.get('role', {}).get('name', 'Unknown')
545
+ owner = resp.get('owner', {})
546
+ owner_name = f"{owner.get('firstName', '')} {owner.get('lastName', '')}".strip()
547
+ if not owner_name:
548
+ owner_name = owner.get('name', 'Unknown')
549
+ responsibilities_list.append(ResponsibilitySummary(
550
+ role=role,
551
+ owner=owner_name,
552
+ owner_id=owner.get('id')
553
+ ))
554
+ except Exception:
555
+ pass # Responsibilities are optional
556
+
557
+ # 5. Get comments
558
+ if include_comments:
559
+ try:
560
+ comments_result = connector.comment.get_comments(asset_id)
561
+ for comment_data in comments_result.get('results', []):
562
+ try:
563
+ comments_list.append(CommentModel.model_validate(comment_data))
564
+ except Exception:
565
+ pass
566
+ except Exception:
567
+ pass # Comments are optional
568
+
569
+ # 6. Get activities
570
+ if include_activities:
571
+ try:
572
+ activities_list = self.get_asset_activities(asset_id)
573
+ except Exception:
574
+ pass # Activities are optional
575
+
576
+ # Create and return AssetProfileModel
577
+ return AssetProfileModel(
578
+ asset=asset_data,
579
+ attributes=attributes_dict,
580
+ relations=RelationsGrouped(**relations_data),
581
+ responsibilities=responsibilities_list,
582
+ comments=comments_list,
583
+ activities=activities_list
584
+ )
585
+
586
+ def get_full_profile_flat(self, asset_id: str):
587
+ """
588
+ Get a flattened profile of an asset suitable for export to CSV/DataFrame.
589
+
590
+ Returns a dictionary with all values as simple types (strings, numbers, lists).
591
+
592
+ Args:
593
+ asset_id: The UUID of the asset.
594
+
595
+ Returns:
596
+ Flattened dictionary with all asset information.
597
+
598
+ Example:
599
+ >>> flat = connector.asset.get_full_profile_flat("asset-uuid")
600
+ >>> import pandas as pd
601
+ >>> df = pd.DataFrame([flat])
602
+ """
603
+ profile = self.get_full_profile(asset_id)
604
+
605
+ flat = {
606
+ # Basic info
607
+ "id": profile.asset.id,
608
+ "name": profile.asset.name,
609
+ "display_name": profile.asset.display_name,
610
+ "type": profile.asset.type_name,
611
+ "type_id": profile.asset.type.id,
612
+ "status": profile.asset.status_name,
613
+ "status_id": profile.asset.status.id,
614
+ "domain": profile.asset.domain_name,
615
+ "domain_id": profile.asset.domain.id,
616
+ "created_on": profile.asset.created_on,
617
+ "last_modified_on": profile["asset"].get("lastModifiedOn"),
618
+ }
619
+
620
+ # Add attributes with prefix
621
+ for attr_name, attr_value in profile.get("attributes", {}).items():
622
+ # Clean HTML from description
623
+ if attr_name == "Description" and isinstance(attr_value, str):
624
+ import re
625
+ attr_value = re.sub(r'<[^>]+>', '', attr_value)
626
+ flat[f"attr_{attr_name.lower().replace(' ', '_')}"] = attr_value
627
+
628
+ # Add relation counts
629
+ flat["relations_outgoing_count"] = profile["relations"].get("outgoing_count", 0)
630
+ flat["relations_incoming_count"] = profile["relations"].get("incoming_count", 0)
631
+
632
+ # Add relation summaries
633
+ outgoing_summary = []
634
+ for rel_type, targets in profile["relations"].get("outgoing", {}).items():
635
+ outgoing_summary.append(f"{rel_type}: {len(targets)}")
636
+ flat["relations_outgoing_summary"] = "; ".join(outgoing_summary)
637
+
638
+ incoming_summary = []
639
+ for rel_type, sources in profile["relations"].get("incoming", {}).items():
640
+ incoming_summary.append(f"{rel_type}: {len(sources)}")
641
+ flat["relations_incoming_summary"] = "; ".join(incoming_summary)
642
+
643
+ # Add responsibilities
644
+ resp_list = [f"{r['role']}: {r['owner']}" for r in profile.get("responsibilities", [])]
645
+ flat["responsibilities"] = "; ".join(resp_list)
646
+
647
+ return flat
648
+
649
+ def add_tags(self, asset_id: str, tags: list):
650
+ """
651
+ Add tags to an asset.
652
+ :param asset_id: The ID of the asset.
653
+ :param tags: List of tags (strings) to add.
654
+ :return: Response from the API.
655
+ """
656
+ if not asset_id:
657
+ raise ValueError("asset_id is required")
658
+ if not tags or not isinstance(tags, list):
659
+ raise ValueError("tags must be a non-empty list of strings")
660
+
661
+ url = f"{self.__base_api}/{asset_id}/tags"
662
+ data = {"tagNames": tags}
663
+
664
+ response = self._post(url=url, data=data)
665
+ return self._handle_response(response)
666
+
667
+ def remove_tags(self, asset_id: str, tags: list):
668
+ """
669
+ Remove tags from an asset.
670
+ :param asset_id: The ID of the asset.
671
+ :param tags: List of tags (strings) to remove.
672
+ :return: Response from the API.
673
+ """
674
+ if not asset_id:
675
+ raise ValueError("asset_id is required")
676
+ if not tags or not isinstance(tags, list):
677
+ raise ValueError("tags must be a non-empty list of strings")
678
+
679
+ url = f"{self.__base_api}/{asset_id}/tags"
680
+ # DELETE with body is not standard in many libs but Collibra might support it or use a different endpoint?
681
+ # Checking Collibra API: DELETE /assets/{assetId}/tags takes list of tags in body.
682
+ # BaseAPI._delete does not support data.
683
+ # We need to use requests directly or extend BaseAPI.
684
+
685
+ import requests
686
+ # Access connector auth and timeout
687
+ connector = self._BaseAPI__connector
688
+
689
+ response = requests.delete(
690
+ url,
691
+ json=tags, # Pass tags directly as list or {"tags": ...}? API says list of strings usually.
692
+ # Checking Collibra docs: DELETE /assets/{assetId}/tags body is ["tag1", "tag2"]
693
+ auth=connector.auth,
694
+ timeout=connector.timeout,
695
+ headers={"Content-Type": "application/json"}
696
+ )
697
+
698
+ return self._handle_response(response)
699
+
700
+ def add_attachment(self, asset_id: str, file_path: str):
701
+ """
702
+ Upload an attachment to an asset.
703
+ :param asset_id: The ID of the asset.
704
+ :param file_path: Path to the file to upload.
705
+ :return: Response from the API.
706
+ """
707
+ import os
708
+ import requests
709
+
710
+ if not asset_id:
711
+ raise ValueError("asset_id is required")
712
+ if not os.path.exists(file_path):
713
+ raise FileNotFoundError(f"File not found: {file_path}")
714
+
715
+ url = f"{self._BaseAPI__connector.api}/attachments"
716
+ filename = os.path.basename(file_path)
717
+
718
+ # Open file in binary mode and ensure it's closed
719
+ with open(file_path, 'rb') as f:
720
+ files = {
721
+ 'file': (filename, f, 'application/octet-stream'),
722
+ 'resourceId': (None, str(asset_id)),
723
+ 'resourceType': (None, 'Asset')
724
+ }
725
+
726
+ response = requests.post(
727
+ url,
728
+ files=files,
729
+ auth=self._BaseAPI__connector.auth,
730
+ timeout=self._BaseAPI__connector.timeout
731
+ )
732
+
733
+ return self._handle_response(response)
734
+
735
+ def get_attachments(self, asset_id: str):
736
+ """
737
+ Get attachments for an asset.
738
+ :param asset_id: The ID of the asset.
739
+ :return: List of attachments.
740
+ """
741
+ url = f"{self._BaseAPI__connector.api}/attachments"
742
+ params = {
743
+ "resourceId": asset_id,
744
+ "resourceType": "Asset"
745
+ }
746
+
747
+ response = self._get(url=url, params=params)
748
+ return self._handle_response(response).get("results", [])