collibra-connector 1.0.19__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- collibra_connector/__init__.py +284 -4
- collibra_connector/api/Asset.py +301 -3
- collibra_connector/api/Attribute.py +204 -0
- collibra_connector/api/Base.py +2 -2
- collibra_connector/api/Relation.py +216 -0
- collibra_connector/api/Responsibility.py +5 -5
- collibra_connector/api/Search.py +102 -0
- collibra_connector/api/Workflow.py +50 -16
- collibra_connector/api/__init__.py +23 -13
- collibra_connector/async_connector.py +930 -0
- collibra_connector/cli.py +597 -0
- collibra_connector/connector.py +270 -48
- collibra_connector/helpers.py +845 -0
- collibra_connector/lineage.py +716 -0
- collibra_connector/models.py +897 -0
- collibra_connector/py.typed +0 -0
- collibra_connector/telemetry.py +576 -0
- collibra_connector/testing.py +806 -0
- collibra_connector-1.1.1.dist-info/METADATA +540 -0
- collibra_connector-1.1.1.dist-info/RECORD +32 -0
- {collibra_connector-1.0.19.dist-info → collibra_connector-1.1.1.dist-info}/WHEEL +1 -1
- collibra_connector-1.1.1.dist-info/entry_points.txt +2 -0
- collibra_connector-1.0.19.dist-info/METADATA +0 -157
- collibra_connector-1.0.19.dist-info/RECORD +0 -21
- {collibra_connector-1.0.19.dist-info → collibra_connector-1.1.1.dist-info}/licenses/LICENSE +0 -0
- {collibra_connector-1.0.19.dist-info → collibra_connector-1.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,930 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Async Collibra Connector - Asynchronous API client using httpx.
|
|
3
|
+
|
|
4
|
+
This module provides an asynchronous version of CollibraConnector,
|
|
5
|
+
enabling parallel requests for massive performance improvements
|
|
6
|
+
in batch operations and migrations.
|
|
7
|
+
|
|
8
|
+
Example:
|
|
9
|
+
>>> import asyncio
|
|
10
|
+
>>> from collibra_connector import AsyncCollibraConnector
|
|
11
|
+
>>>
|
|
12
|
+
>>> async def main():
|
|
13
|
+
... async with AsyncCollibraConnector(
|
|
14
|
+
... api="https://your-instance.collibra.com",
|
|
15
|
+
... username="user",
|
|
16
|
+
... password="pass"
|
|
17
|
+
... ) as conn:
|
|
18
|
+
... # Fetch 100 assets in parallel
|
|
19
|
+
... asset_ids = ["uuid1", "uuid2", ..., "uuid100"]
|
|
20
|
+
... assets = await conn.asset.get_assets_batch(asset_ids)
|
|
21
|
+
... print(f"Fetched {len(assets)} assets")
|
|
22
|
+
>>>
|
|
23
|
+
>>> asyncio.run(main())
|
|
24
|
+
"""
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import asyncio
|
|
28
|
+
import logging
|
|
29
|
+
import os
|
|
30
|
+
from typing import Any, Dict, List, Optional, TypeVar, Union, TYPE_CHECKING
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
import httpx
|
|
34
|
+
HTTPX_AVAILABLE = True
|
|
35
|
+
except ImportError:
|
|
36
|
+
HTTPX_AVAILABLE = False
|
|
37
|
+
|
|
38
|
+
from .models import (
|
|
39
|
+
AssetModel,
|
|
40
|
+
AssetList,
|
|
41
|
+
DomainModel,
|
|
42
|
+
DomainList,
|
|
43
|
+
CommunityModel,
|
|
44
|
+
CommunityList,
|
|
45
|
+
UserModel,
|
|
46
|
+
UserList,
|
|
47
|
+
AttributeModel,
|
|
48
|
+
AttributeList,
|
|
49
|
+
RelationModel,
|
|
50
|
+
RelationList,
|
|
51
|
+
SearchResults,
|
|
52
|
+
SearchResultModel,
|
|
53
|
+
AssetProfileModel,
|
|
54
|
+
RelationsGrouped,
|
|
55
|
+
RelationSummary,
|
|
56
|
+
ResponsibilitySummary,
|
|
57
|
+
parse_asset,
|
|
58
|
+
parse_assets,
|
|
59
|
+
parse_domain,
|
|
60
|
+
parse_domains,
|
|
61
|
+
parse_community,
|
|
62
|
+
parse_communities,
|
|
63
|
+
parse_user,
|
|
64
|
+
parse_users,
|
|
65
|
+
parse_attribute,
|
|
66
|
+
parse_attributes,
|
|
67
|
+
parse_relation,
|
|
68
|
+
parse_relations,
|
|
69
|
+
parse_search_results,
|
|
70
|
+
)
|
|
71
|
+
from .api.Exceptions import (
|
|
72
|
+
UnauthorizedError,
|
|
73
|
+
ForbiddenError,
|
|
74
|
+
NotFoundError,
|
|
75
|
+
ServerError,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
T = TypeVar('T')
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class AsyncBaseAPI:
|
|
83
|
+
"""Base class for async API modules."""
|
|
84
|
+
|
|
85
|
+
def __init__(self, connector: "AsyncCollibraConnector") -> None:
|
|
86
|
+
self._connector = connector
|
|
87
|
+
self._base_url = connector.api
|
|
88
|
+
|
|
89
|
+
async def _get(
|
|
90
|
+
self,
|
|
91
|
+
endpoint: str,
|
|
92
|
+
params: Optional[Dict[str, Any]] = None
|
|
93
|
+
) -> Dict[str, Any]:
|
|
94
|
+
"""Make async GET request."""
|
|
95
|
+
return await self._connector._request("GET", endpoint, params=params)
|
|
96
|
+
|
|
97
|
+
async def _post(
|
|
98
|
+
self,
|
|
99
|
+
endpoint: str,
|
|
100
|
+
data: Dict[str, Any],
|
|
101
|
+
params: Optional[Dict[str, Any]] = None
|
|
102
|
+
) -> Dict[str, Any]:
|
|
103
|
+
"""Make async POST request."""
|
|
104
|
+
return await self._connector._request("POST", endpoint, json=data, params=params)
|
|
105
|
+
|
|
106
|
+
async def _put(
|
|
107
|
+
self,
|
|
108
|
+
endpoint: str,
|
|
109
|
+
data: Dict[str, Any]
|
|
110
|
+
) -> Dict[str, Any]:
|
|
111
|
+
"""Make async PUT request."""
|
|
112
|
+
return await self._connector._request("PUT", endpoint, json=data)
|
|
113
|
+
|
|
114
|
+
async def _patch(
|
|
115
|
+
self,
|
|
116
|
+
endpoint: str,
|
|
117
|
+
data: Dict[str, Any]
|
|
118
|
+
) -> Dict[str, Any]:
|
|
119
|
+
"""Make async PATCH request."""
|
|
120
|
+
return await self._connector._request("PATCH", endpoint, json=data)
|
|
121
|
+
|
|
122
|
+
async def _delete(self, endpoint: str) -> Dict[str, Any]:
|
|
123
|
+
"""Make async DELETE request."""
|
|
124
|
+
return await self._connector._request("DELETE", endpoint)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class AsyncAssetAPI(AsyncBaseAPI):
|
|
128
|
+
"""Async Asset API with typed returns."""
|
|
129
|
+
|
|
130
|
+
async def get_asset(self, asset_id: str) -> AssetModel:
|
|
131
|
+
"""
|
|
132
|
+
Get an asset by ID.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
asset_id: The UUID of the asset.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
AssetModel with full type information.
|
|
139
|
+
|
|
140
|
+
Example:
|
|
141
|
+
>>> asset = await conn.asset.get_asset("uuid")
|
|
142
|
+
>>> print(asset.name)
|
|
143
|
+
>>> print(asset.status.name)
|
|
144
|
+
"""
|
|
145
|
+
data = await self._get(f"/assets/{asset_id}")
|
|
146
|
+
return parse_asset(data)
|
|
147
|
+
|
|
148
|
+
async def find_assets(
|
|
149
|
+
self,
|
|
150
|
+
community_id: Optional[str] = None,
|
|
151
|
+
domain_id: Optional[str] = None,
|
|
152
|
+
asset_type_ids: Optional[List[str]] = None,
|
|
153
|
+
status_ids: Optional[List[str]] = None,
|
|
154
|
+
name: Optional[str] = None,
|
|
155
|
+
name_match_mode: str = "ANYWHERE",
|
|
156
|
+
limit: int = 100,
|
|
157
|
+
offset: int = 0
|
|
158
|
+
) -> AssetList:
|
|
159
|
+
"""
|
|
160
|
+
Find assets with filters.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
community_id: Filter by community.
|
|
164
|
+
domain_id: Filter by domain.
|
|
165
|
+
asset_type_ids: Filter by asset type IDs.
|
|
166
|
+
status_ids: Filter by status IDs.
|
|
167
|
+
name: Filter by name.
|
|
168
|
+
name_match_mode: How to match name (ANYWHERE, START, END, EXACT).
|
|
169
|
+
limit: Max results per page.
|
|
170
|
+
offset: Offset for pagination.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
AssetList with paginated results.
|
|
174
|
+
"""
|
|
175
|
+
params: Dict[str, Any] = {"limit": limit, "offset": offset}
|
|
176
|
+
if community_id:
|
|
177
|
+
params["communityId"] = community_id
|
|
178
|
+
if domain_id:
|
|
179
|
+
params["domainId"] = domain_id
|
|
180
|
+
if asset_type_ids:
|
|
181
|
+
params["typeIds"] = asset_type_ids
|
|
182
|
+
if status_ids:
|
|
183
|
+
params["statusIds"] = status_ids
|
|
184
|
+
if name:
|
|
185
|
+
params["name"] = name
|
|
186
|
+
params["nameMatchMode"] = name_match_mode
|
|
187
|
+
|
|
188
|
+
data = await self._get("/assets", params=params)
|
|
189
|
+
return parse_assets(data)
|
|
190
|
+
|
|
191
|
+
async def get_assets_batch(
|
|
192
|
+
self,
|
|
193
|
+
asset_ids: List[str],
|
|
194
|
+
max_concurrent: int = 50
|
|
195
|
+
) -> List[AssetModel]:
|
|
196
|
+
"""
|
|
197
|
+
Fetch multiple assets in parallel.
|
|
198
|
+
|
|
199
|
+
This is the key advantage of async - fetch 100 assets
|
|
200
|
+
in the time it would take to fetch 2-3 sequentially.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
asset_ids: List of asset UUIDs to fetch.
|
|
204
|
+
max_concurrent: Maximum concurrent requests.
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
List of AssetModel objects.
|
|
208
|
+
|
|
209
|
+
Example:
|
|
210
|
+
>>> assets = await conn.asset.get_assets_batch(["id1", "id2", ...])
|
|
211
|
+
>>> for asset in assets:
|
|
212
|
+
... print(f"{asset.name}: {asset.status.name}")
|
|
213
|
+
"""
|
|
214
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
215
|
+
|
|
216
|
+
async def fetch_one(asset_id: str) -> Optional[AssetModel]:
|
|
217
|
+
async with semaphore:
|
|
218
|
+
try:
|
|
219
|
+
return await self.get_asset(asset_id)
|
|
220
|
+
except Exception:
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
results = await asyncio.gather(*[fetch_one(aid) for aid in asset_ids])
|
|
224
|
+
return [r for r in results if r is not None]
|
|
225
|
+
|
|
226
|
+
async def add_asset(
|
|
227
|
+
self,
|
|
228
|
+
name: str,
|
|
229
|
+
domain_id: str,
|
|
230
|
+
type_id: Optional[str] = None,
|
|
231
|
+
status_id: Optional[str] = None,
|
|
232
|
+
display_name: Optional[str] = None,
|
|
233
|
+
excluded_from_auto_hyperlinking: bool = False
|
|
234
|
+
) -> AssetModel:
|
|
235
|
+
"""Create a new asset."""
|
|
236
|
+
data: Dict[str, Any] = {
|
|
237
|
+
"name": name,
|
|
238
|
+
"domainId": domain_id,
|
|
239
|
+
"excludedFromAutoHyperlinking": excluded_from_auto_hyperlinking
|
|
240
|
+
}
|
|
241
|
+
if type_id:
|
|
242
|
+
data["typeId"] = type_id
|
|
243
|
+
if status_id:
|
|
244
|
+
data["statusId"] = status_id
|
|
245
|
+
if display_name:
|
|
246
|
+
data["displayName"] = display_name
|
|
247
|
+
|
|
248
|
+
result = await self._post("/assets", data)
|
|
249
|
+
return parse_asset(result)
|
|
250
|
+
|
|
251
|
+
async def add_assets_batch(
|
|
252
|
+
self,
|
|
253
|
+
assets: List[Dict[str, Any]],
|
|
254
|
+
max_concurrent: int = 20
|
|
255
|
+
) -> List[AssetModel]:
|
|
256
|
+
"""
|
|
257
|
+
Create multiple assets in parallel.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
assets: List of asset data dicts with keys:
|
|
261
|
+
name, domain_id, type_id, status_id, display_name
|
|
262
|
+
max_concurrent: Maximum concurrent requests.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
List of created AssetModel objects.
|
|
266
|
+
"""
|
|
267
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
268
|
+
|
|
269
|
+
async def create_one(asset_data: Dict[str, Any]) -> Optional[AssetModel]:
|
|
270
|
+
async with semaphore:
|
|
271
|
+
try:
|
|
272
|
+
return await self.add_asset(**asset_data)
|
|
273
|
+
except Exception:
|
|
274
|
+
return None
|
|
275
|
+
|
|
276
|
+
results = await asyncio.gather(*[create_one(a) for a in assets])
|
|
277
|
+
return [r for r in results if r is not None]
|
|
278
|
+
|
|
279
|
+
async def change_asset(
|
|
280
|
+
self,
|
|
281
|
+
asset_id: str,
|
|
282
|
+
name: Optional[str] = None,
|
|
283
|
+
display_name: Optional[str] = None,
|
|
284
|
+
status_id: Optional[str] = None,
|
|
285
|
+
domain_id: Optional[str] = None
|
|
286
|
+
) -> AssetModel:
|
|
287
|
+
"""Update an asset."""
|
|
288
|
+
data: Dict[str, Any] = {"id": asset_id}
|
|
289
|
+
if name:
|
|
290
|
+
data["name"] = name
|
|
291
|
+
if display_name:
|
|
292
|
+
data["displayName"] = display_name
|
|
293
|
+
if status_id:
|
|
294
|
+
data["statusId"] = status_id
|
|
295
|
+
if domain_id:
|
|
296
|
+
data["domainId"] = domain_id
|
|
297
|
+
|
|
298
|
+
result = await self._patch(f"/assets/{asset_id}", data)
|
|
299
|
+
return parse_asset(result)
|
|
300
|
+
|
|
301
|
+
async def remove_asset(self, asset_id: str) -> None:
|
|
302
|
+
"""Delete an asset."""
|
|
303
|
+
await self._delete(f"/assets/{asset_id}")
|
|
304
|
+
|
|
305
|
+
async def get_full_profile(
|
|
306
|
+
self,
|
|
307
|
+
asset_id: str,
|
|
308
|
+
include_attributes: bool = True,
|
|
309
|
+
include_relations: bool = True,
|
|
310
|
+
include_responsibilities: bool = True
|
|
311
|
+
) -> AssetProfileModel:
|
|
312
|
+
"""
|
|
313
|
+
Get complete asset profile with all related data in parallel.
|
|
314
|
+
|
|
315
|
+
This method fetches asset, attributes, relations, and responsibilities
|
|
316
|
+
all in parallel, providing maximum performance.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
asset_id: The UUID of the asset.
|
|
320
|
+
include_attributes: Include attributes.
|
|
321
|
+
include_relations: Include relations.
|
|
322
|
+
include_responsibilities: Include responsibilities.
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
AssetProfileModel with all data.
|
|
326
|
+
"""
|
|
327
|
+
# Prepare tasks
|
|
328
|
+
tasks = {
|
|
329
|
+
"asset": self.get_asset(asset_id)
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
if include_attributes:
|
|
333
|
+
tasks["attributes"] = self._connector.attribute.get_attributes_as_dict(asset_id)
|
|
334
|
+
|
|
335
|
+
if include_relations:
|
|
336
|
+
tasks["relations"] = self._connector.relation.get_asset_relations(asset_id)
|
|
337
|
+
|
|
338
|
+
if include_responsibilities:
|
|
339
|
+
tasks["responsibilities"] = self._connector.responsibility.get_asset_responsibilities(asset_id)
|
|
340
|
+
|
|
341
|
+
# Execute all in parallel
|
|
342
|
+
results = await asyncio.gather(*tasks.values(), return_exceptions=True)
|
|
343
|
+
result_dict = dict(zip(tasks.keys(), results))
|
|
344
|
+
|
|
345
|
+
# Build profile
|
|
346
|
+
asset = result_dict.get("asset")
|
|
347
|
+
if isinstance(asset, Exception):
|
|
348
|
+
raise asset
|
|
349
|
+
|
|
350
|
+
attributes = result_dict.get("attributes", {})
|
|
351
|
+
if isinstance(attributes, Exception):
|
|
352
|
+
attributes = {}
|
|
353
|
+
|
|
354
|
+
relations = result_dict.get("relations", {})
|
|
355
|
+
if isinstance(relations, Exception):
|
|
356
|
+
relations = {"outgoing": {}, "incoming": {}, "outgoing_count": 0, "incoming_count": 0}
|
|
357
|
+
|
|
358
|
+
responsibilities = result_dict.get("responsibilities", [])
|
|
359
|
+
if isinstance(responsibilities, Exception):
|
|
360
|
+
responsibilities = []
|
|
361
|
+
|
|
362
|
+
# Convert relations to model
|
|
363
|
+
relations_grouped = RelationsGrouped(
|
|
364
|
+
outgoing={k: [RelationSummary(**r) for r in v] for k, v in relations.get("outgoing", {}).items()},
|
|
365
|
+
incoming={k: [RelationSummary(**r) for r in v] for k, v in relations.get("incoming", {}).items()},
|
|
366
|
+
outgoing_count=relations.get("outgoing_count", 0),
|
|
367
|
+
incoming_count=relations.get("incoming_count", 0)
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
# Convert responsibilities
|
|
371
|
+
resp_summaries = [ResponsibilitySummary(**r) for r in responsibilities]
|
|
372
|
+
|
|
373
|
+
return AssetProfileModel(
|
|
374
|
+
asset=asset,
|
|
375
|
+
attributes=attributes,
|
|
376
|
+
relations=relations_grouped,
|
|
377
|
+
responsibilities=resp_summaries
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
class AsyncAttributeAPI(AsyncBaseAPI):
|
|
382
|
+
"""Async Attribute API."""
|
|
383
|
+
|
|
384
|
+
async def get_attributes(
|
|
385
|
+
self,
|
|
386
|
+
asset_id: str,
|
|
387
|
+
type_ids: Optional[List[str]] = None,
|
|
388
|
+
limit: int = 100,
|
|
389
|
+
offset: int = 0
|
|
390
|
+
) -> AttributeList:
|
|
391
|
+
"""Get attributes for an asset."""
|
|
392
|
+
params: Dict[str, Any] = {
|
|
393
|
+
"assetId": asset_id,
|
|
394
|
+
"limit": limit,
|
|
395
|
+
"offset": offset
|
|
396
|
+
}
|
|
397
|
+
if type_ids:
|
|
398
|
+
params["typeIds"] = type_ids
|
|
399
|
+
|
|
400
|
+
data = await self._get("/attributes", params=params)
|
|
401
|
+
return parse_attributes(data)
|
|
402
|
+
|
|
403
|
+
async def get_attributes_as_dict(self, asset_id: str) -> Dict[str, Any]:
|
|
404
|
+
"""Get attributes as a simple name->value dict."""
|
|
405
|
+
result = await self.get_attributes(asset_id, limit=500)
|
|
406
|
+
return {
|
|
407
|
+
attr.type_name: attr.value
|
|
408
|
+
for attr in result.results
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
async def add_attribute(
|
|
412
|
+
self,
|
|
413
|
+
asset_id: str,
|
|
414
|
+
type_id: str,
|
|
415
|
+
value: Any
|
|
416
|
+
) -> AttributeModel:
|
|
417
|
+
"""Add an attribute to an asset."""
|
|
418
|
+
data = {
|
|
419
|
+
"assetId": asset_id,
|
|
420
|
+
"typeId": type_id,
|
|
421
|
+
"value": value
|
|
422
|
+
}
|
|
423
|
+
result = await self._post("/attributes", data)
|
|
424
|
+
return parse_attribute(result)
|
|
425
|
+
|
|
426
|
+
async def add_attributes_batch(
|
|
427
|
+
self,
|
|
428
|
+
attributes: List[Dict[str, Any]],
|
|
429
|
+
max_concurrent: int = 30
|
|
430
|
+
) -> List[AttributeModel]:
|
|
431
|
+
"""Add multiple attributes in parallel."""
|
|
432
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
433
|
+
|
|
434
|
+
async def add_one(attr: Dict[str, Any]) -> Optional[AttributeModel]:
|
|
435
|
+
async with semaphore:
|
|
436
|
+
try:
|
|
437
|
+
return await self.add_attribute(**attr)
|
|
438
|
+
except Exception:
|
|
439
|
+
return None
|
|
440
|
+
|
|
441
|
+
results = await asyncio.gather(*[add_one(a) for a in attributes])
|
|
442
|
+
return [r for r in results if r is not None]
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
class AsyncDomainAPI(AsyncBaseAPI):
|
|
446
|
+
"""Async Domain API."""
|
|
447
|
+
|
|
448
|
+
async def get_domain(self, domain_id: str) -> DomainModel:
|
|
449
|
+
"""Get a domain by ID."""
|
|
450
|
+
data = await self._get(f"/domains/{domain_id}")
|
|
451
|
+
return parse_domain(data)
|
|
452
|
+
|
|
453
|
+
async def find_domains(
|
|
454
|
+
self,
|
|
455
|
+
community_id: Optional[str] = None,
|
|
456
|
+
name: Optional[str] = None,
|
|
457
|
+
limit: int = 100,
|
|
458
|
+
offset: int = 0
|
|
459
|
+
) -> DomainList:
|
|
460
|
+
"""Find domains with filters."""
|
|
461
|
+
params: Dict[str, Any] = {"limit": limit, "offset": offset}
|
|
462
|
+
if community_id:
|
|
463
|
+
params["communityId"] = community_id
|
|
464
|
+
if name:
|
|
465
|
+
params["name"] = name
|
|
466
|
+
|
|
467
|
+
data = await self._get("/domains", params=params)
|
|
468
|
+
return parse_domains(data)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
class AsyncCommunityAPI(AsyncBaseAPI):
|
|
472
|
+
"""Async Community API."""
|
|
473
|
+
|
|
474
|
+
async def get_community(self, community_id: str) -> CommunityModel:
|
|
475
|
+
"""Get a community by ID."""
|
|
476
|
+
data = await self._get(f"/communities/{community_id}")
|
|
477
|
+
return parse_community(data)
|
|
478
|
+
|
|
479
|
+
async def find_communities(
|
|
480
|
+
self,
|
|
481
|
+
name: Optional[str] = None,
|
|
482
|
+
parent_id: Optional[str] = None,
|
|
483
|
+
limit: int = 100,
|
|
484
|
+
offset: int = 0
|
|
485
|
+
) -> CommunityList:
|
|
486
|
+
"""Find communities with filters."""
|
|
487
|
+
params: Dict[str, Any] = {"limit": limit, "offset": offset}
|
|
488
|
+
if name:
|
|
489
|
+
params["name"] = name
|
|
490
|
+
if parent_id:
|
|
491
|
+
params["parentId"] = parent_id
|
|
492
|
+
|
|
493
|
+
data = await self._get("/communities", params=params)
|
|
494
|
+
return parse_communities(data)
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
class AsyncRelationAPI(AsyncBaseAPI):
|
|
498
|
+
"""Async Relation API."""
|
|
499
|
+
|
|
500
|
+
async def get_relation(self, relation_id: str) -> RelationModel:
|
|
501
|
+
"""Get a relation by ID."""
|
|
502
|
+
data = await self._get(f"/relations/{relation_id}")
|
|
503
|
+
return parse_relation(data)
|
|
504
|
+
|
|
505
|
+
async def find_relations(
|
|
506
|
+
self,
|
|
507
|
+
source_id: Optional[str] = None,
|
|
508
|
+
target_id: Optional[str] = None,
|
|
509
|
+
type_id: Optional[str] = None,
|
|
510
|
+
limit: int = 100,
|
|
511
|
+
offset: int = 0
|
|
512
|
+
) -> RelationList:
|
|
513
|
+
"""Find relations with filters."""
|
|
514
|
+
params: Dict[str, Any] = {"limit": limit, "offset": offset}
|
|
515
|
+
if source_id:
|
|
516
|
+
params["sourceId"] = source_id
|
|
517
|
+
if target_id:
|
|
518
|
+
params["targetId"] = target_id
|
|
519
|
+
if type_id:
|
|
520
|
+
params["typeId"] = type_id
|
|
521
|
+
|
|
522
|
+
data = await self._get("/relations", params=params)
|
|
523
|
+
return parse_relations(data)
|
|
524
|
+
|
|
525
|
+
async def add_relation(
|
|
526
|
+
self,
|
|
527
|
+
source_id: str,
|
|
528
|
+
target_id: str,
|
|
529
|
+
type_id: str
|
|
530
|
+
) -> RelationModel:
|
|
531
|
+
"""Create a new relation."""
|
|
532
|
+
data = {
|
|
533
|
+
"sourceId": source_id,
|
|
534
|
+
"targetId": target_id,
|
|
535
|
+
"typeId": type_id
|
|
536
|
+
}
|
|
537
|
+
result = await self._post("/relations", data)
|
|
538
|
+
return parse_relation(result)
|
|
539
|
+
|
|
540
|
+
async def add_relations_batch(
|
|
541
|
+
self,
|
|
542
|
+
relations: List[Dict[str, str]],
|
|
543
|
+
max_concurrent: int = 30
|
|
544
|
+
) -> List[RelationModel]:
|
|
545
|
+
"""Create multiple relations in parallel."""
|
|
546
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
547
|
+
|
|
548
|
+
async def add_one(rel: Dict[str, str]) -> Optional[RelationModel]:
|
|
549
|
+
async with semaphore:
|
|
550
|
+
try:
|
|
551
|
+
return await self.add_relation(**rel)
|
|
552
|
+
except Exception:
|
|
553
|
+
return None
|
|
554
|
+
|
|
555
|
+
results = await asyncio.gather(*[add_one(r) for r in relations])
|
|
556
|
+
return [r for r in results if r is not None]
|
|
557
|
+
|
|
558
|
+
async def get_asset_relations(
|
|
559
|
+
self,
|
|
560
|
+
asset_id: str,
|
|
561
|
+
direction: str = "BOTH",
|
|
562
|
+
limit: int = 500
|
|
563
|
+
) -> Dict[str, Any]:
|
|
564
|
+
"""Get all relations for an asset, grouped by direction and type."""
|
|
565
|
+
result: Dict[str, Any] = {
|
|
566
|
+
"outgoing": {},
|
|
567
|
+
"incoming": {},
|
|
568
|
+
"outgoing_count": 0,
|
|
569
|
+
"incoming_count": 0
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
tasks = []
|
|
573
|
+
if direction in ("BOTH", "OUTGOING"):
|
|
574
|
+
tasks.append(("outgoing", self.find_relations(source_id=asset_id, limit=limit)))
|
|
575
|
+
if direction in ("BOTH", "INCOMING"):
|
|
576
|
+
tasks.append(("incoming", self.find_relations(target_id=asset_id, limit=limit)))
|
|
577
|
+
|
|
578
|
+
results = await asyncio.gather(*[t[1] for t in tasks], return_exceptions=True)
|
|
579
|
+
|
|
580
|
+
for (direction_key, _), rel_result in zip(tasks, results):
|
|
581
|
+
if isinstance(rel_result, Exception):
|
|
582
|
+
continue
|
|
583
|
+
|
|
584
|
+
result[f"{direction_key}_count"] = rel_result.total
|
|
585
|
+
|
|
586
|
+
for rel in rel_result.results:
|
|
587
|
+
type_name = rel.type_name
|
|
588
|
+
if type_name not in result[direction_key]:
|
|
589
|
+
result[direction_key][type_name] = []
|
|
590
|
+
|
|
591
|
+
if direction_key == "outgoing":
|
|
592
|
+
result[direction_key][type_name].append({
|
|
593
|
+
"id": rel.id,
|
|
594
|
+
"target_id": rel.target.id,
|
|
595
|
+
"target_name": rel.target.name
|
|
596
|
+
})
|
|
597
|
+
else:
|
|
598
|
+
result[direction_key][type_name].append({
|
|
599
|
+
"id": rel.id,
|
|
600
|
+
"source_id": rel.source.id,
|
|
601
|
+
"source_name": rel.source.name
|
|
602
|
+
})
|
|
603
|
+
|
|
604
|
+
return result
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
class AsyncResponsibilityAPI(AsyncBaseAPI):
|
|
608
|
+
"""Async Responsibility API."""
|
|
609
|
+
|
|
610
|
+
async def get_asset_responsibilities(
|
|
611
|
+
self,
|
|
612
|
+
asset_id: str,
|
|
613
|
+
limit: int = 50
|
|
614
|
+
) -> List[Dict[str, Any]]:
|
|
615
|
+
"""Get responsibilities for an asset."""
|
|
616
|
+
params = {"resourceIds": asset_id, "limit": limit}
|
|
617
|
+
data = await self._get("/responsibilities", params=params)
|
|
618
|
+
|
|
619
|
+
responsibilities = []
|
|
620
|
+
for resp in data.get("results", []):
|
|
621
|
+
role = resp.get("role", {}).get("name", "Unknown")
|
|
622
|
+
owner = resp.get("owner", {})
|
|
623
|
+
owner_name = f"{owner.get('firstName', '')} {owner.get('lastName', '')}".strip()
|
|
624
|
+
if not owner_name:
|
|
625
|
+
owner_name = owner.get("name", "Unknown")
|
|
626
|
+
|
|
627
|
+
responsibilities.append({
|
|
628
|
+
"role": role,
|
|
629
|
+
"owner": owner_name,
|
|
630
|
+
"owner_id": owner.get("id")
|
|
631
|
+
})
|
|
632
|
+
|
|
633
|
+
return responsibilities
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
class AsyncSearchAPI(AsyncBaseAPI):
|
|
637
|
+
"""Async Search API."""
|
|
638
|
+
|
|
639
|
+
async def find(
|
|
640
|
+
self,
|
|
641
|
+
query: str,
|
|
642
|
+
limit: int = 10,
|
|
643
|
+
offset: int = 0,
|
|
644
|
+
category: Optional[str] = None,
|
|
645
|
+
type_ids: Optional[List[str]] = None,
|
|
646
|
+
domain_ids: Optional[List[str]] = None,
|
|
647
|
+
community_ids: Optional[List[str]] = None
|
|
648
|
+
) -> SearchResults:
|
|
649
|
+
"""
|
|
650
|
+
Perform a search query.
|
|
651
|
+
|
|
652
|
+
Args:
|
|
653
|
+
query: Search keywords (supports wildcards).
|
|
654
|
+
limit: Max results.
|
|
655
|
+
offset: Pagination offset.
|
|
656
|
+
category: Filter by category (ASSET, DOMAIN, etc.).
|
|
657
|
+
type_ids: Filter by type IDs.
|
|
658
|
+
domain_ids: Filter by domain IDs.
|
|
659
|
+
community_ids: Filter by community IDs.
|
|
660
|
+
|
|
661
|
+
Returns:
|
|
662
|
+
SearchResults with typed results.
|
|
663
|
+
"""
|
|
664
|
+
data: Dict[str, Any] = {
|
|
665
|
+
"keywords": query,
|
|
666
|
+
"limit": limit,
|
|
667
|
+
"offset": offset
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
if category:
|
|
671
|
+
data["category"] = category
|
|
672
|
+
if type_ids:
|
|
673
|
+
data["typeIds"] = type_ids
|
|
674
|
+
if domain_ids:
|
|
675
|
+
data["domainIds"] = domain_ids
|
|
676
|
+
if community_ids:
|
|
677
|
+
data["communityIds"] = community_ids
|
|
678
|
+
|
|
679
|
+
result = await self._post("/search", data)
|
|
680
|
+
return parse_search_results(result)
|
|
681
|
+
|
|
682
|
+
async def find_assets(
|
|
683
|
+
self,
|
|
684
|
+
query: str,
|
|
685
|
+
limit: int = 10,
|
|
686
|
+
offset: int = 0,
|
|
687
|
+
type_ids: Optional[List[str]] = None,
|
|
688
|
+
domain_ids: Optional[List[str]] = None
|
|
689
|
+
) -> SearchResults:
|
|
690
|
+
"""Search specifically for assets."""
|
|
691
|
+
return await self.find(
|
|
692
|
+
query=query,
|
|
693
|
+
limit=limit,
|
|
694
|
+
offset=offset,
|
|
695
|
+
category="ASSET",
|
|
696
|
+
type_ids=type_ids,
|
|
697
|
+
domain_ids=domain_ids
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
class AsyncCollibraConnector:
|
|
702
|
+
"""
|
|
703
|
+
Asynchronous Collibra Connector using httpx.
|
|
704
|
+
|
|
705
|
+
Provides massive performance improvements for batch operations
|
|
706
|
+
by executing requests in parallel.
|
|
707
|
+
|
|
708
|
+
Example:
|
|
709
|
+
>>> async with AsyncCollibraConnector(
|
|
710
|
+
... api="https://your-instance.collibra.com",
|
|
711
|
+
... username="user",
|
|
712
|
+
... password="pass"
|
|
713
|
+
... ) as conn:
|
|
714
|
+
... # Fetch 100 assets in parallel (10-50x faster than sync)
|
|
715
|
+
... assets = await conn.asset.get_assets_batch(asset_ids)
|
|
716
|
+
...
|
|
717
|
+
... # Create 50 relations in parallel
|
|
718
|
+
... relations = await conn.relation.add_relations_batch(relation_data)
|
|
719
|
+
"""
|
|
720
|
+
|
|
721
|
+
DEFAULT_TIMEOUT: float = 30.0
|
|
722
|
+
DEFAULT_MAX_RETRIES: int = 3
|
|
723
|
+
DEFAULT_RETRY_DELAY: float = 1.0
|
|
724
|
+
RETRYABLE_STATUS_CODES: tuple = (429, 500, 502, 503, 504)
|
|
725
|
+
|
|
726
|
+
def __init__(
|
|
727
|
+
self,
|
|
728
|
+
api: Optional[str] = None,
|
|
729
|
+
username: Optional[str] = None,
|
|
730
|
+
password: Optional[str] = None,
|
|
731
|
+
timeout: float = DEFAULT_TIMEOUT,
|
|
732
|
+
max_retries: int = DEFAULT_MAX_RETRIES,
|
|
733
|
+
retry_delay: float = DEFAULT_RETRY_DELAY,
|
|
734
|
+
max_connections: int = 100
|
|
735
|
+
) -> None:
|
|
736
|
+
"""
|
|
737
|
+
Initialize the async connector.
|
|
738
|
+
|
|
739
|
+
Args:
|
|
740
|
+
api: Base URL for Collibra (or COLLIBRA_URL env var).
|
|
741
|
+
username: Username (or COLLIBRA_USERNAME env var).
|
|
742
|
+
password: Password (or COLLIBRA_PASSWORD env var).
|
|
743
|
+
timeout: Request timeout in seconds.
|
|
744
|
+
max_retries: Max retry attempts.
|
|
745
|
+
retry_delay: Base delay between retries.
|
|
746
|
+
max_connections: Maximum concurrent connections.
|
|
747
|
+
"""
|
|
748
|
+
if not HTTPX_AVAILABLE:
|
|
749
|
+
raise ImportError(
|
|
750
|
+
"httpx is required for async operations. "
|
|
751
|
+
"Install it with: pip install httpx"
|
|
752
|
+
)
|
|
753
|
+
|
|
754
|
+
# Load from env vars if not provided
|
|
755
|
+
api = api or os.environ.get("COLLIBRA_URL")
|
|
756
|
+
username = username or os.environ.get("COLLIBRA_USERNAME")
|
|
757
|
+
password = password or os.environ.get("COLLIBRA_PASSWORD")
|
|
758
|
+
|
|
759
|
+
if not api:
|
|
760
|
+
raise ValueError("API URL required (arg or COLLIBRA_URL env var)")
|
|
761
|
+
if not username:
|
|
762
|
+
raise ValueError("Username required (arg or COLLIBRA_USERNAME env var)")
|
|
763
|
+
if not password:
|
|
764
|
+
raise ValueError("Password required (arg or COLLIBRA_PASSWORD env var)")
|
|
765
|
+
|
|
766
|
+
self._api = api.rstrip("/") + "/rest/2.0"
|
|
767
|
+
self._base_url = api.rstrip("/")
|
|
768
|
+
self._auth = (username, password)
|
|
769
|
+
self._timeout = timeout
|
|
770
|
+
self._max_retries = max_retries
|
|
771
|
+
self._retry_delay = retry_delay
|
|
772
|
+
self._max_connections = max_connections
|
|
773
|
+
|
|
774
|
+
self._client: Optional[httpx.AsyncClient] = None
|
|
775
|
+
self.logger = logging.getLogger(__name__)
|
|
776
|
+
|
|
777
|
+
# Initialize API modules
|
|
778
|
+
self.asset = AsyncAssetAPI(self)
|
|
779
|
+
self.attribute = AsyncAttributeAPI(self)
|
|
780
|
+
self.domain = AsyncDomainAPI(self)
|
|
781
|
+
self.community = AsyncCommunityAPI(self)
|
|
782
|
+
self.relation = AsyncRelationAPI(self)
|
|
783
|
+
self.responsibility = AsyncResponsibilityAPI(self)
|
|
784
|
+
self.search = AsyncSearchAPI(self)
|
|
785
|
+
|
|
786
|
+
@property
|
|
787
|
+
def api(self) -> str:
|
|
788
|
+
"""Get the full API URL."""
|
|
789
|
+
return self._api
|
|
790
|
+
|
|
791
|
+
async def __aenter__(self) -> "AsyncCollibraConnector":
|
|
792
|
+
"""Enter async context manager."""
|
|
793
|
+
limits = httpx.Limits(
|
|
794
|
+
max_connections=self._max_connections,
|
|
795
|
+
max_keepalive_connections=self._max_connections // 2
|
|
796
|
+
)
|
|
797
|
+
self._client = httpx.AsyncClient(
|
|
798
|
+
auth=self._auth,
|
|
799
|
+
timeout=self._timeout,
|
|
800
|
+
limits=limits,
|
|
801
|
+
headers={
|
|
802
|
+
"Content-Type": "application/json",
|
|
803
|
+
"Accept": "application/json"
|
|
804
|
+
}
|
|
805
|
+
)
|
|
806
|
+
return self
|
|
807
|
+
|
|
808
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
809
|
+
"""Exit async context manager."""
|
|
810
|
+
if self._client:
|
|
811
|
+
await self._client.aclose()
|
|
812
|
+
self._client = None
|
|
813
|
+
|
|
814
|
+
async def _request(
|
|
815
|
+
self,
|
|
816
|
+
method: str,
|
|
817
|
+
endpoint: str,
|
|
818
|
+
**kwargs: Any
|
|
819
|
+
) -> Dict[str, Any]:
|
|
820
|
+
"""
|
|
821
|
+
Make an HTTP request with automatic retry logic.
|
|
822
|
+
|
|
823
|
+
Args:
|
|
824
|
+
method: HTTP method (GET, POST, PUT, PATCH, DELETE).
|
|
825
|
+
endpoint: API endpoint (e.g., "/assets/{id}").
|
|
826
|
+
**kwargs: Additional httpx request arguments.
|
|
827
|
+
|
|
828
|
+
Returns:
|
|
829
|
+
Response JSON as dictionary.
|
|
830
|
+
|
|
831
|
+
Raises:
|
|
832
|
+
Various exceptions based on response status.
|
|
833
|
+
"""
|
|
834
|
+
if not self._client:
|
|
835
|
+
raise RuntimeError(
|
|
836
|
+
"Client not initialized. Use 'async with' context manager."
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
url = f"{self._api}{endpoint}"
|
|
840
|
+
last_exception: Optional[Exception] = None
|
|
841
|
+
|
|
842
|
+
for attempt in range(self._max_retries):
|
|
843
|
+
try:
|
|
844
|
+
response = await self._client.request(method, url, **kwargs)
|
|
845
|
+
|
|
846
|
+
# Handle response based on status code
|
|
847
|
+
if response.status_code in (200, 201):
|
|
848
|
+
if response.text.strip():
|
|
849
|
+
return response.json()
|
|
850
|
+
return {}
|
|
851
|
+
elif response.status_code == 204:
|
|
852
|
+
return {}
|
|
853
|
+
elif response.status_code == 401:
|
|
854
|
+
raise UnauthorizedError(f"Unauthorized: {response.text}")
|
|
855
|
+
elif response.status_code == 403:
|
|
856
|
+
raise ForbiddenError(f"Forbidden: {response.text}")
|
|
857
|
+
elif response.status_code == 404:
|
|
858
|
+
raise NotFoundError(f"Not found: {response.text}")
|
|
859
|
+
elif response.status_code >= 500:
|
|
860
|
+
if attempt < self._max_retries - 1:
|
|
861
|
+
delay = self._retry_delay * (2 ** attempt)
|
|
862
|
+
self.logger.warning(
|
|
863
|
+
f"Server error {response.status_code}, "
|
|
864
|
+
f"retrying in {delay:.1f}s"
|
|
865
|
+
)
|
|
866
|
+
await asyncio.sleep(delay)
|
|
867
|
+
continue
|
|
868
|
+
raise ServerError(f"Server error: {response.text}")
|
|
869
|
+
elif response.status_code == 429:
|
|
870
|
+
if attempt < self._max_retries - 1:
|
|
871
|
+
delay = self._retry_delay * (2 ** attempt)
|
|
872
|
+
self.logger.warning(
|
|
873
|
+
f"Rate limited, retrying in {delay:.1f}s"
|
|
874
|
+
)
|
|
875
|
+
await asyncio.sleep(delay)
|
|
876
|
+
continue
|
|
877
|
+
raise Exception(f"Rate limited: {response.text}")
|
|
878
|
+
else:
|
|
879
|
+
raise Exception(
|
|
880
|
+
f"Unexpected status {response.status_code}: {response.text}"
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
|
884
|
+
last_exception = e
|
|
885
|
+
if attempt < self._max_retries - 1:
|
|
886
|
+
delay = self._retry_delay * (2 ** attempt)
|
|
887
|
+
self.logger.warning(
|
|
888
|
+
f"{type(e).__name__}, retrying in {delay:.1f}s"
|
|
889
|
+
)
|
|
890
|
+
await asyncio.sleep(delay)
|
|
891
|
+
else:
|
|
892
|
+
raise
|
|
893
|
+
|
|
894
|
+
if last_exception:
|
|
895
|
+
raise last_exception
|
|
896
|
+
raise Exception("Request failed after all retries")
|
|
897
|
+
|
|
898
|
+
async def test_connection(self) -> bool:
|
|
899
|
+
"""Test the connection to Collibra."""
|
|
900
|
+
try:
|
|
901
|
+
await self._request("GET", "/auth/sessions/current")
|
|
902
|
+
return True
|
|
903
|
+
except Exception as e:
|
|
904
|
+
self.logger.error(f"Connection test failed: {e}")
|
|
905
|
+
return False
|
|
906
|
+
|
|
907
|
+
async def gather_with_concurrency(
|
|
908
|
+
self,
|
|
909
|
+
coros: List[Any],
|
|
910
|
+
max_concurrent: int = 50
|
|
911
|
+
) -> List[Any]:
|
|
912
|
+
"""
|
|
913
|
+
Execute coroutines with limited concurrency.
|
|
914
|
+
|
|
915
|
+
Useful for rate-limiting bulk operations.
|
|
916
|
+
|
|
917
|
+
Args:
|
|
918
|
+
coros: List of coroutines to execute.
|
|
919
|
+
max_concurrent: Maximum concurrent executions.
|
|
920
|
+
|
|
921
|
+
Returns:
|
|
922
|
+
List of results.
|
|
923
|
+
"""
|
|
924
|
+
semaphore = asyncio.Semaphore(max_concurrent)
|
|
925
|
+
|
|
926
|
+
async def limited(coro: Any) -> Any:
|
|
927
|
+
async with semaphore:
|
|
928
|
+
return await coro
|
|
929
|
+
|
|
930
|
+
return await asyncio.gather(*[limited(c) for c in coros])
|