collibra-connector 1.0.19__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,930 @@
1
+ """
2
+ Async Collibra Connector - Asynchronous API client using httpx.
3
+
4
+ This module provides an asynchronous version of CollibraConnector,
5
+ enabling parallel requests for massive performance improvements
6
+ in batch operations and migrations.
7
+
8
+ Example:
9
+ >>> import asyncio
10
+ >>> from collibra_connector import AsyncCollibraConnector
11
+ >>>
12
+ >>> async def main():
13
+ ... async with AsyncCollibraConnector(
14
+ ... api="https://your-instance.collibra.com",
15
+ ... username="user",
16
+ ... password="pass"
17
+ ... ) as conn:
18
+ ... # Fetch 100 assets in parallel
19
+ ... asset_ids = ["uuid1", "uuid2", ..., "uuid100"]
20
+ ... assets = await conn.asset.get_assets_batch(asset_ids)
21
+ ... print(f"Fetched {len(assets)} assets")
22
+ >>>
23
+ >>> asyncio.run(main())
24
+ """
25
+ from __future__ import annotations
26
+
27
+ import asyncio
28
+ import logging
29
+ import os
30
+ from typing import Any, Dict, List, Optional, TypeVar, Union, TYPE_CHECKING
31
+
32
+ try:
33
+ import httpx
34
+ HTTPX_AVAILABLE = True
35
+ except ImportError:
36
+ HTTPX_AVAILABLE = False
37
+
38
+ from .models import (
39
+ AssetModel,
40
+ AssetList,
41
+ DomainModel,
42
+ DomainList,
43
+ CommunityModel,
44
+ CommunityList,
45
+ UserModel,
46
+ UserList,
47
+ AttributeModel,
48
+ AttributeList,
49
+ RelationModel,
50
+ RelationList,
51
+ SearchResults,
52
+ SearchResultModel,
53
+ AssetProfileModel,
54
+ RelationsGrouped,
55
+ RelationSummary,
56
+ ResponsibilitySummary,
57
+ parse_asset,
58
+ parse_assets,
59
+ parse_domain,
60
+ parse_domains,
61
+ parse_community,
62
+ parse_communities,
63
+ parse_user,
64
+ parse_users,
65
+ parse_attribute,
66
+ parse_attributes,
67
+ parse_relation,
68
+ parse_relations,
69
+ parse_search_results,
70
+ )
71
+ from .api.Exceptions import (
72
+ UnauthorizedError,
73
+ ForbiddenError,
74
+ NotFoundError,
75
+ ServerError,
76
+ )
77
+
78
+
79
+ T = TypeVar('T')
80
+
81
+
82
+ class AsyncBaseAPI:
83
+ """Base class for async API modules."""
84
+
85
+ def __init__(self, connector: "AsyncCollibraConnector") -> None:
86
+ self._connector = connector
87
+ self._base_url = connector.api
88
+
89
+ async def _get(
90
+ self,
91
+ endpoint: str,
92
+ params: Optional[Dict[str, Any]] = None
93
+ ) -> Dict[str, Any]:
94
+ """Make async GET request."""
95
+ return await self._connector._request("GET", endpoint, params=params)
96
+
97
+ async def _post(
98
+ self,
99
+ endpoint: str,
100
+ data: Dict[str, Any],
101
+ params: Optional[Dict[str, Any]] = None
102
+ ) -> Dict[str, Any]:
103
+ """Make async POST request."""
104
+ return await self._connector._request("POST", endpoint, json=data, params=params)
105
+
106
+ async def _put(
107
+ self,
108
+ endpoint: str,
109
+ data: Dict[str, Any]
110
+ ) -> Dict[str, Any]:
111
+ """Make async PUT request."""
112
+ return await self._connector._request("PUT", endpoint, json=data)
113
+
114
+ async def _patch(
115
+ self,
116
+ endpoint: str,
117
+ data: Dict[str, Any]
118
+ ) -> Dict[str, Any]:
119
+ """Make async PATCH request."""
120
+ return await self._connector._request("PATCH", endpoint, json=data)
121
+
122
+ async def _delete(self, endpoint: str) -> Dict[str, Any]:
123
+ """Make async DELETE request."""
124
+ return await self._connector._request("DELETE", endpoint)
125
+
126
+
127
+ class AsyncAssetAPI(AsyncBaseAPI):
128
+ """Async Asset API with typed returns."""
129
+
130
+ async def get_asset(self, asset_id: str) -> AssetModel:
131
+ """
132
+ Get an asset by ID.
133
+
134
+ Args:
135
+ asset_id: The UUID of the asset.
136
+
137
+ Returns:
138
+ AssetModel with full type information.
139
+
140
+ Example:
141
+ >>> asset = await conn.asset.get_asset("uuid")
142
+ >>> print(asset.name)
143
+ >>> print(asset.status.name)
144
+ """
145
+ data = await self._get(f"/assets/{asset_id}")
146
+ return parse_asset(data)
147
+
148
+ async def find_assets(
149
+ self,
150
+ community_id: Optional[str] = None,
151
+ domain_id: Optional[str] = None,
152
+ asset_type_ids: Optional[List[str]] = None,
153
+ status_ids: Optional[List[str]] = None,
154
+ name: Optional[str] = None,
155
+ name_match_mode: str = "ANYWHERE",
156
+ limit: int = 100,
157
+ offset: int = 0
158
+ ) -> AssetList:
159
+ """
160
+ Find assets with filters.
161
+
162
+ Args:
163
+ community_id: Filter by community.
164
+ domain_id: Filter by domain.
165
+ asset_type_ids: Filter by asset type IDs.
166
+ status_ids: Filter by status IDs.
167
+ name: Filter by name.
168
+ name_match_mode: How to match name (ANYWHERE, START, END, EXACT).
169
+ limit: Max results per page.
170
+ offset: Offset for pagination.
171
+
172
+ Returns:
173
+ AssetList with paginated results.
174
+ """
175
+ params: Dict[str, Any] = {"limit": limit, "offset": offset}
176
+ if community_id:
177
+ params["communityId"] = community_id
178
+ if domain_id:
179
+ params["domainId"] = domain_id
180
+ if asset_type_ids:
181
+ params["typeIds"] = asset_type_ids
182
+ if status_ids:
183
+ params["statusIds"] = status_ids
184
+ if name:
185
+ params["name"] = name
186
+ params["nameMatchMode"] = name_match_mode
187
+
188
+ data = await self._get("/assets", params=params)
189
+ return parse_assets(data)
190
+
191
+ async def get_assets_batch(
192
+ self,
193
+ asset_ids: List[str],
194
+ max_concurrent: int = 50
195
+ ) -> List[AssetModel]:
196
+ """
197
+ Fetch multiple assets in parallel.
198
+
199
+ This is the key advantage of async - fetch 100 assets
200
+ in the time it would take to fetch 2-3 sequentially.
201
+
202
+ Args:
203
+ asset_ids: List of asset UUIDs to fetch.
204
+ max_concurrent: Maximum concurrent requests.
205
+
206
+ Returns:
207
+ List of AssetModel objects.
208
+
209
+ Example:
210
+ >>> assets = await conn.asset.get_assets_batch(["id1", "id2", ...])
211
+ >>> for asset in assets:
212
+ ... print(f"{asset.name}: {asset.status.name}")
213
+ """
214
+ semaphore = asyncio.Semaphore(max_concurrent)
215
+
216
+ async def fetch_one(asset_id: str) -> Optional[AssetModel]:
217
+ async with semaphore:
218
+ try:
219
+ return await self.get_asset(asset_id)
220
+ except Exception:
221
+ return None
222
+
223
+ results = await asyncio.gather(*[fetch_one(aid) for aid in asset_ids])
224
+ return [r for r in results if r is not None]
225
+
226
+ async def add_asset(
227
+ self,
228
+ name: str,
229
+ domain_id: str,
230
+ type_id: Optional[str] = None,
231
+ status_id: Optional[str] = None,
232
+ display_name: Optional[str] = None,
233
+ excluded_from_auto_hyperlinking: bool = False
234
+ ) -> AssetModel:
235
+ """Create a new asset."""
236
+ data: Dict[str, Any] = {
237
+ "name": name,
238
+ "domainId": domain_id,
239
+ "excludedFromAutoHyperlinking": excluded_from_auto_hyperlinking
240
+ }
241
+ if type_id:
242
+ data["typeId"] = type_id
243
+ if status_id:
244
+ data["statusId"] = status_id
245
+ if display_name:
246
+ data["displayName"] = display_name
247
+
248
+ result = await self._post("/assets", data)
249
+ return parse_asset(result)
250
+
251
+ async def add_assets_batch(
252
+ self,
253
+ assets: List[Dict[str, Any]],
254
+ max_concurrent: int = 20
255
+ ) -> List[AssetModel]:
256
+ """
257
+ Create multiple assets in parallel.
258
+
259
+ Args:
260
+ assets: List of asset data dicts with keys:
261
+ name, domain_id, type_id, status_id, display_name
262
+ max_concurrent: Maximum concurrent requests.
263
+
264
+ Returns:
265
+ List of created AssetModel objects.
266
+ """
267
+ semaphore = asyncio.Semaphore(max_concurrent)
268
+
269
+ async def create_one(asset_data: Dict[str, Any]) -> Optional[AssetModel]:
270
+ async with semaphore:
271
+ try:
272
+ return await self.add_asset(**asset_data)
273
+ except Exception:
274
+ return None
275
+
276
+ results = await asyncio.gather(*[create_one(a) for a in assets])
277
+ return [r for r in results if r is not None]
278
+
279
+ async def change_asset(
280
+ self,
281
+ asset_id: str,
282
+ name: Optional[str] = None,
283
+ display_name: Optional[str] = None,
284
+ status_id: Optional[str] = None,
285
+ domain_id: Optional[str] = None
286
+ ) -> AssetModel:
287
+ """Update an asset."""
288
+ data: Dict[str, Any] = {"id": asset_id}
289
+ if name:
290
+ data["name"] = name
291
+ if display_name:
292
+ data["displayName"] = display_name
293
+ if status_id:
294
+ data["statusId"] = status_id
295
+ if domain_id:
296
+ data["domainId"] = domain_id
297
+
298
+ result = await self._patch(f"/assets/{asset_id}", data)
299
+ return parse_asset(result)
300
+
301
+ async def remove_asset(self, asset_id: str) -> None:
302
+ """Delete an asset."""
303
+ await self._delete(f"/assets/{asset_id}")
304
+
305
+ async def get_full_profile(
306
+ self,
307
+ asset_id: str,
308
+ include_attributes: bool = True,
309
+ include_relations: bool = True,
310
+ include_responsibilities: bool = True
311
+ ) -> AssetProfileModel:
312
+ """
313
+ Get complete asset profile with all related data in parallel.
314
+
315
+ This method fetches asset, attributes, relations, and responsibilities
316
+ all in parallel, providing maximum performance.
317
+
318
+ Args:
319
+ asset_id: The UUID of the asset.
320
+ include_attributes: Include attributes.
321
+ include_relations: Include relations.
322
+ include_responsibilities: Include responsibilities.
323
+
324
+ Returns:
325
+ AssetProfileModel with all data.
326
+ """
327
+ # Prepare tasks
328
+ tasks = {
329
+ "asset": self.get_asset(asset_id)
330
+ }
331
+
332
+ if include_attributes:
333
+ tasks["attributes"] = self._connector.attribute.get_attributes_as_dict(asset_id)
334
+
335
+ if include_relations:
336
+ tasks["relations"] = self._connector.relation.get_asset_relations(asset_id)
337
+
338
+ if include_responsibilities:
339
+ tasks["responsibilities"] = self._connector.responsibility.get_asset_responsibilities(asset_id)
340
+
341
+ # Execute all in parallel
342
+ results = await asyncio.gather(*tasks.values(), return_exceptions=True)
343
+ result_dict = dict(zip(tasks.keys(), results))
344
+
345
+ # Build profile
346
+ asset = result_dict.get("asset")
347
+ if isinstance(asset, Exception):
348
+ raise asset
349
+
350
+ attributes = result_dict.get("attributes", {})
351
+ if isinstance(attributes, Exception):
352
+ attributes = {}
353
+
354
+ relations = result_dict.get("relations", {})
355
+ if isinstance(relations, Exception):
356
+ relations = {"outgoing": {}, "incoming": {}, "outgoing_count": 0, "incoming_count": 0}
357
+
358
+ responsibilities = result_dict.get("responsibilities", [])
359
+ if isinstance(responsibilities, Exception):
360
+ responsibilities = []
361
+
362
+ # Convert relations to model
363
+ relations_grouped = RelationsGrouped(
364
+ outgoing={k: [RelationSummary(**r) for r in v] for k, v in relations.get("outgoing", {}).items()},
365
+ incoming={k: [RelationSummary(**r) for r in v] for k, v in relations.get("incoming", {}).items()},
366
+ outgoing_count=relations.get("outgoing_count", 0),
367
+ incoming_count=relations.get("incoming_count", 0)
368
+ )
369
+
370
+ # Convert responsibilities
371
+ resp_summaries = [ResponsibilitySummary(**r) for r in responsibilities]
372
+
373
+ return AssetProfileModel(
374
+ asset=asset,
375
+ attributes=attributes,
376
+ relations=relations_grouped,
377
+ responsibilities=resp_summaries
378
+ )
379
+
380
+
381
+ class AsyncAttributeAPI(AsyncBaseAPI):
382
+ """Async Attribute API."""
383
+
384
+ async def get_attributes(
385
+ self,
386
+ asset_id: str,
387
+ type_ids: Optional[List[str]] = None,
388
+ limit: int = 100,
389
+ offset: int = 0
390
+ ) -> AttributeList:
391
+ """Get attributes for an asset."""
392
+ params: Dict[str, Any] = {
393
+ "assetId": asset_id,
394
+ "limit": limit,
395
+ "offset": offset
396
+ }
397
+ if type_ids:
398
+ params["typeIds"] = type_ids
399
+
400
+ data = await self._get("/attributes", params=params)
401
+ return parse_attributes(data)
402
+
403
+ async def get_attributes_as_dict(self, asset_id: str) -> Dict[str, Any]:
404
+ """Get attributes as a simple name->value dict."""
405
+ result = await self.get_attributes(asset_id, limit=500)
406
+ return {
407
+ attr.type_name: attr.value
408
+ for attr in result.results
409
+ }
410
+
411
+ async def add_attribute(
412
+ self,
413
+ asset_id: str,
414
+ type_id: str,
415
+ value: Any
416
+ ) -> AttributeModel:
417
+ """Add an attribute to an asset."""
418
+ data = {
419
+ "assetId": asset_id,
420
+ "typeId": type_id,
421
+ "value": value
422
+ }
423
+ result = await self._post("/attributes", data)
424
+ return parse_attribute(result)
425
+
426
+ async def add_attributes_batch(
427
+ self,
428
+ attributes: List[Dict[str, Any]],
429
+ max_concurrent: int = 30
430
+ ) -> List[AttributeModel]:
431
+ """Add multiple attributes in parallel."""
432
+ semaphore = asyncio.Semaphore(max_concurrent)
433
+
434
+ async def add_one(attr: Dict[str, Any]) -> Optional[AttributeModel]:
435
+ async with semaphore:
436
+ try:
437
+ return await self.add_attribute(**attr)
438
+ except Exception:
439
+ return None
440
+
441
+ results = await asyncio.gather(*[add_one(a) for a in attributes])
442
+ return [r for r in results if r is not None]
443
+
444
+
445
+ class AsyncDomainAPI(AsyncBaseAPI):
446
+ """Async Domain API."""
447
+
448
+ async def get_domain(self, domain_id: str) -> DomainModel:
449
+ """Get a domain by ID."""
450
+ data = await self._get(f"/domains/{domain_id}")
451
+ return parse_domain(data)
452
+
453
+ async def find_domains(
454
+ self,
455
+ community_id: Optional[str] = None,
456
+ name: Optional[str] = None,
457
+ limit: int = 100,
458
+ offset: int = 0
459
+ ) -> DomainList:
460
+ """Find domains with filters."""
461
+ params: Dict[str, Any] = {"limit": limit, "offset": offset}
462
+ if community_id:
463
+ params["communityId"] = community_id
464
+ if name:
465
+ params["name"] = name
466
+
467
+ data = await self._get("/domains", params=params)
468
+ return parse_domains(data)
469
+
470
+
471
+ class AsyncCommunityAPI(AsyncBaseAPI):
472
+ """Async Community API."""
473
+
474
+ async def get_community(self, community_id: str) -> CommunityModel:
475
+ """Get a community by ID."""
476
+ data = await self._get(f"/communities/{community_id}")
477
+ return parse_community(data)
478
+
479
+ async def find_communities(
480
+ self,
481
+ name: Optional[str] = None,
482
+ parent_id: Optional[str] = None,
483
+ limit: int = 100,
484
+ offset: int = 0
485
+ ) -> CommunityList:
486
+ """Find communities with filters."""
487
+ params: Dict[str, Any] = {"limit": limit, "offset": offset}
488
+ if name:
489
+ params["name"] = name
490
+ if parent_id:
491
+ params["parentId"] = parent_id
492
+
493
+ data = await self._get("/communities", params=params)
494
+ return parse_communities(data)
495
+
496
+
497
+ class AsyncRelationAPI(AsyncBaseAPI):
498
+ """Async Relation API."""
499
+
500
+ async def get_relation(self, relation_id: str) -> RelationModel:
501
+ """Get a relation by ID."""
502
+ data = await self._get(f"/relations/{relation_id}")
503
+ return parse_relation(data)
504
+
505
+ async def find_relations(
506
+ self,
507
+ source_id: Optional[str] = None,
508
+ target_id: Optional[str] = None,
509
+ type_id: Optional[str] = None,
510
+ limit: int = 100,
511
+ offset: int = 0
512
+ ) -> RelationList:
513
+ """Find relations with filters."""
514
+ params: Dict[str, Any] = {"limit": limit, "offset": offset}
515
+ if source_id:
516
+ params["sourceId"] = source_id
517
+ if target_id:
518
+ params["targetId"] = target_id
519
+ if type_id:
520
+ params["typeId"] = type_id
521
+
522
+ data = await self._get("/relations", params=params)
523
+ return parse_relations(data)
524
+
525
+ async def add_relation(
526
+ self,
527
+ source_id: str,
528
+ target_id: str,
529
+ type_id: str
530
+ ) -> RelationModel:
531
+ """Create a new relation."""
532
+ data = {
533
+ "sourceId": source_id,
534
+ "targetId": target_id,
535
+ "typeId": type_id
536
+ }
537
+ result = await self._post("/relations", data)
538
+ return parse_relation(result)
539
+
540
+ async def add_relations_batch(
541
+ self,
542
+ relations: List[Dict[str, str]],
543
+ max_concurrent: int = 30
544
+ ) -> List[RelationModel]:
545
+ """Create multiple relations in parallel."""
546
+ semaphore = asyncio.Semaphore(max_concurrent)
547
+
548
+ async def add_one(rel: Dict[str, str]) -> Optional[RelationModel]:
549
+ async with semaphore:
550
+ try:
551
+ return await self.add_relation(**rel)
552
+ except Exception:
553
+ return None
554
+
555
+ results = await asyncio.gather(*[add_one(r) for r in relations])
556
+ return [r for r in results if r is not None]
557
+
558
+ async def get_asset_relations(
559
+ self,
560
+ asset_id: str,
561
+ direction: str = "BOTH",
562
+ limit: int = 500
563
+ ) -> Dict[str, Any]:
564
+ """Get all relations for an asset, grouped by direction and type."""
565
+ result: Dict[str, Any] = {
566
+ "outgoing": {},
567
+ "incoming": {},
568
+ "outgoing_count": 0,
569
+ "incoming_count": 0
570
+ }
571
+
572
+ tasks = []
573
+ if direction in ("BOTH", "OUTGOING"):
574
+ tasks.append(("outgoing", self.find_relations(source_id=asset_id, limit=limit)))
575
+ if direction in ("BOTH", "INCOMING"):
576
+ tasks.append(("incoming", self.find_relations(target_id=asset_id, limit=limit)))
577
+
578
+ results = await asyncio.gather(*[t[1] for t in tasks], return_exceptions=True)
579
+
580
+ for (direction_key, _), rel_result in zip(tasks, results):
581
+ if isinstance(rel_result, Exception):
582
+ continue
583
+
584
+ result[f"{direction_key}_count"] = rel_result.total
585
+
586
+ for rel in rel_result.results:
587
+ type_name = rel.type_name
588
+ if type_name not in result[direction_key]:
589
+ result[direction_key][type_name] = []
590
+
591
+ if direction_key == "outgoing":
592
+ result[direction_key][type_name].append({
593
+ "id": rel.id,
594
+ "target_id": rel.target.id,
595
+ "target_name": rel.target.name
596
+ })
597
+ else:
598
+ result[direction_key][type_name].append({
599
+ "id": rel.id,
600
+ "source_id": rel.source.id,
601
+ "source_name": rel.source.name
602
+ })
603
+
604
+ return result
605
+
606
+
607
+ class AsyncResponsibilityAPI(AsyncBaseAPI):
608
+ """Async Responsibility API."""
609
+
610
+ async def get_asset_responsibilities(
611
+ self,
612
+ asset_id: str,
613
+ limit: int = 50
614
+ ) -> List[Dict[str, Any]]:
615
+ """Get responsibilities for an asset."""
616
+ params = {"resourceIds": asset_id, "limit": limit}
617
+ data = await self._get("/responsibilities", params=params)
618
+
619
+ responsibilities = []
620
+ for resp in data.get("results", []):
621
+ role = resp.get("role", {}).get("name", "Unknown")
622
+ owner = resp.get("owner", {})
623
+ owner_name = f"{owner.get('firstName', '')} {owner.get('lastName', '')}".strip()
624
+ if not owner_name:
625
+ owner_name = owner.get("name", "Unknown")
626
+
627
+ responsibilities.append({
628
+ "role": role,
629
+ "owner": owner_name,
630
+ "owner_id": owner.get("id")
631
+ })
632
+
633
+ return responsibilities
634
+
635
+
636
+ class AsyncSearchAPI(AsyncBaseAPI):
637
+ """Async Search API."""
638
+
639
+ async def find(
640
+ self,
641
+ query: str,
642
+ limit: int = 10,
643
+ offset: int = 0,
644
+ category: Optional[str] = None,
645
+ type_ids: Optional[List[str]] = None,
646
+ domain_ids: Optional[List[str]] = None,
647
+ community_ids: Optional[List[str]] = None
648
+ ) -> SearchResults:
649
+ """
650
+ Perform a search query.
651
+
652
+ Args:
653
+ query: Search keywords (supports wildcards).
654
+ limit: Max results.
655
+ offset: Pagination offset.
656
+ category: Filter by category (ASSET, DOMAIN, etc.).
657
+ type_ids: Filter by type IDs.
658
+ domain_ids: Filter by domain IDs.
659
+ community_ids: Filter by community IDs.
660
+
661
+ Returns:
662
+ SearchResults with typed results.
663
+ """
664
+ data: Dict[str, Any] = {
665
+ "keywords": query,
666
+ "limit": limit,
667
+ "offset": offset
668
+ }
669
+
670
+ if category:
671
+ data["category"] = category
672
+ if type_ids:
673
+ data["typeIds"] = type_ids
674
+ if domain_ids:
675
+ data["domainIds"] = domain_ids
676
+ if community_ids:
677
+ data["communityIds"] = community_ids
678
+
679
+ result = await self._post("/search", data)
680
+ return parse_search_results(result)
681
+
682
+ async def find_assets(
683
+ self,
684
+ query: str,
685
+ limit: int = 10,
686
+ offset: int = 0,
687
+ type_ids: Optional[List[str]] = None,
688
+ domain_ids: Optional[List[str]] = None
689
+ ) -> SearchResults:
690
+ """Search specifically for assets."""
691
+ return await self.find(
692
+ query=query,
693
+ limit=limit,
694
+ offset=offset,
695
+ category="ASSET",
696
+ type_ids=type_ids,
697
+ domain_ids=domain_ids
698
+ )
699
+
700
+
701
+ class AsyncCollibraConnector:
702
+ """
703
+ Asynchronous Collibra Connector using httpx.
704
+
705
+ Provides massive performance improvements for batch operations
706
+ by executing requests in parallel.
707
+
708
+ Example:
709
+ >>> async with AsyncCollibraConnector(
710
+ ... api="https://your-instance.collibra.com",
711
+ ... username="user",
712
+ ... password="pass"
713
+ ... ) as conn:
714
+ ... # Fetch 100 assets in parallel (10-50x faster than sync)
715
+ ... assets = await conn.asset.get_assets_batch(asset_ids)
716
+ ...
717
+ ... # Create 50 relations in parallel
718
+ ... relations = await conn.relation.add_relations_batch(relation_data)
719
+ """
720
+
721
+ DEFAULT_TIMEOUT: float = 30.0
722
+ DEFAULT_MAX_RETRIES: int = 3
723
+ DEFAULT_RETRY_DELAY: float = 1.0
724
+ RETRYABLE_STATUS_CODES: tuple = (429, 500, 502, 503, 504)
725
+
726
+ def __init__(
727
+ self,
728
+ api: Optional[str] = None,
729
+ username: Optional[str] = None,
730
+ password: Optional[str] = None,
731
+ timeout: float = DEFAULT_TIMEOUT,
732
+ max_retries: int = DEFAULT_MAX_RETRIES,
733
+ retry_delay: float = DEFAULT_RETRY_DELAY,
734
+ max_connections: int = 100
735
+ ) -> None:
736
+ """
737
+ Initialize the async connector.
738
+
739
+ Args:
740
+ api: Base URL for Collibra (or COLLIBRA_URL env var).
741
+ username: Username (or COLLIBRA_USERNAME env var).
742
+ password: Password (or COLLIBRA_PASSWORD env var).
743
+ timeout: Request timeout in seconds.
744
+ max_retries: Max retry attempts.
745
+ retry_delay: Base delay between retries.
746
+ max_connections: Maximum concurrent connections.
747
+ """
748
+ if not HTTPX_AVAILABLE:
749
+ raise ImportError(
750
+ "httpx is required for async operations. "
751
+ "Install it with: pip install httpx"
752
+ )
753
+
754
+ # Load from env vars if not provided
755
+ api = api or os.environ.get("COLLIBRA_URL")
756
+ username = username or os.environ.get("COLLIBRA_USERNAME")
757
+ password = password or os.environ.get("COLLIBRA_PASSWORD")
758
+
759
+ if not api:
760
+ raise ValueError("API URL required (arg or COLLIBRA_URL env var)")
761
+ if not username:
762
+ raise ValueError("Username required (arg or COLLIBRA_USERNAME env var)")
763
+ if not password:
764
+ raise ValueError("Password required (arg or COLLIBRA_PASSWORD env var)")
765
+
766
+ self._api = api.rstrip("/") + "/rest/2.0"
767
+ self._base_url = api.rstrip("/")
768
+ self._auth = (username, password)
769
+ self._timeout = timeout
770
+ self._max_retries = max_retries
771
+ self._retry_delay = retry_delay
772
+ self._max_connections = max_connections
773
+
774
+ self._client: Optional[httpx.AsyncClient] = None
775
+ self.logger = logging.getLogger(__name__)
776
+
777
+ # Initialize API modules
778
+ self.asset = AsyncAssetAPI(self)
779
+ self.attribute = AsyncAttributeAPI(self)
780
+ self.domain = AsyncDomainAPI(self)
781
+ self.community = AsyncCommunityAPI(self)
782
+ self.relation = AsyncRelationAPI(self)
783
+ self.responsibility = AsyncResponsibilityAPI(self)
784
+ self.search = AsyncSearchAPI(self)
785
+
786
+ @property
787
+ def api(self) -> str:
788
+ """Get the full API URL."""
789
+ return self._api
790
+
791
+ async def __aenter__(self) -> "AsyncCollibraConnector":
792
+ """Enter async context manager."""
793
+ limits = httpx.Limits(
794
+ max_connections=self._max_connections,
795
+ max_keepalive_connections=self._max_connections // 2
796
+ )
797
+ self._client = httpx.AsyncClient(
798
+ auth=self._auth,
799
+ timeout=self._timeout,
800
+ limits=limits,
801
+ headers={
802
+ "Content-Type": "application/json",
803
+ "Accept": "application/json"
804
+ }
805
+ )
806
+ return self
807
+
808
+ async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
809
+ """Exit async context manager."""
810
+ if self._client:
811
+ await self._client.aclose()
812
+ self._client = None
813
+
814
+ async def _request(
815
+ self,
816
+ method: str,
817
+ endpoint: str,
818
+ **kwargs: Any
819
+ ) -> Dict[str, Any]:
820
+ """
821
+ Make an HTTP request with automatic retry logic.
822
+
823
+ Args:
824
+ method: HTTP method (GET, POST, PUT, PATCH, DELETE).
825
+ endpoint: API endpoint (e.g., "/assets/{id}").
826
+ **kwargs: Additional httpx request arguments.
827
+
828
+ Returns:
829
+ Response JSON as dictionary.
830
+
831
+ Raises:
832
+ Various exceptions based on response status.
833
+ """
834
+ if not self._client:
835
+ raise RuntimeError(
836
+ "Client not initialized. Use 'async with' context manager."
837
+ )
838
+
839
+ url = f"{self._api}{endpoint}"
840
+ last_exception: Optional[Exception] = None
841
+
842
+ for attempt in range(self._max_retries):
843
+ try:
844
+ response = await self._client.request(method, url, **kwargs)
845
+
846
+ # Handle response based on status code
847
+ if response.status_code in (200, 201):
848
+ if response.text.strip():
849
+ return response.json()
850
+ return {}
851
+ elif response.status_code == 204:
852
+ return {}
853
+ elif response.status_code == 401:
854
+ raise UnauthorizedError(f"Unauthorized: {response.text}")
855
+ elif response.status_code == 403:
856
+ raise ForbiddenError(f"Forbidden: {response.text}")
857
+ elif response.status_code == 404:
858
+ raise NotFoundError(f"Not found: {response.text}")
859
+ elif response.status_code >= 500:
860
+ if attempt < self._max_retries - 1:
861
+ delay = self._retry_delay * (2 ** attempt)
862
+ self.logger.warning(
863
+ f"Server error {response.status_code}, "
864
+ f"retrying in {delay:.1f}s"
865
+ )
866
+ await asyncio.sleep(delay)
867
+ continue
868
+ raise ServerError(f"Server error: {response.text}")
869
+ elif response.status_code == 429:
870
+ if attempt < self._max_retries - 1:
871
+ delay = self._retry_delay * (2 ** attempt)
872
+ self.logger.warning(
873
+ f"Rate limited, retrying in {delay:.1f}s"
874
+ )
875
+ await asyncio.sleep(delay)
876
+ continue
877
+ raise Exception(f"Rate limited: {response.text}")
878
+ else:
879
+ raise Exception(
880
+ f"Unexpected status {response.status_code}: {response.text}"
881
+ )
882
+
883
+ except (httpx.ConnectError, httpx.TimeoutException) as e:
884
+ last_exception = e
885
+ if attempt < self._max_retries - 1:
886
+ delay = self._retry_delay * (2 ** attempt)
887
+ self.logger.warning(
888
+ f"{type(e).__name__}, retrying in {delay:.1f}s"
889
+ )
890
+ await asyncio.sleep(delay)
891
+ else:
892
+ raise
893
+
894
+ if last_exception:
895
+ raise last_exception
896
+ raise Exception("Request failed after all retries")
897
+
898
+ async def test_connection(self) -> bool:
899
+ """Test the connection to Collibra."""
900
+ try:
901
+ await self._request("GET", "/auth/sessions/current")
902
+ return True
903
+ except Exception as e:
904
+ self.logger.error(f"Connection test failed: {e}")
905
+ return False
906
+
907
+ async def gather_with_concurrency(
908
+ self,
909
+ coros: List[Any],
910
+ max_concurrent: int = 50
911
+ ) -> List[Any]:
912
+ """
913
+ Execute coroutines with limited concurrency.
914
+
915
+ Useful for rate-limiting bulk operations.
916
+
917
+ Args:
918
+ coros: List of coroutines to execute.
919
+ max_concurrent: Maximum concurrent executions.
920
+
921
+ Returns:
922
+ List of results.
923
+ """
924
+ semaphore = asyncio.Semaphore(max_concurrent)
925
+
926
+ async def limited(coro: Any) -> Any:
927
+ async with semaphore:
928
+ return await coro
929
+
930
+ return await asyncio.gather(*[limited(c) for c in coros])