collibra-connector 1.0.18__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- collibra_connector/__init__.py +284 -4
- collibra_connector/api/Asset.py +301 -3
- collibra_connector/api/Attribute.py +204 -0
- collibra_connector/api/Base.py +2 -2
- collibra_connector/api/Community.py +1 -1
- collibra_connector/api/Relation.py +216 -0
- collibra_connector/api/Responsibility.py +5 -5
- collibra_connector/api/Search.py +102 -0
- collibra_connector/api/__init__.py +23 -13
- collibra_connector/async_connector.py +930 -0
- collibra_connector/cli.py +597 -0
- collibra_connector/connector.py +270 -48
- collibra_connector/helpers.py +845 -0
- collibra_connector/lineage.py +716 -0
- collibra_connector/models.py +897 -0
- collibra_connector/py.typed +0 -0
- collibra_connector/telemetry.py +576 -0
- collibra_connector/testing.py +806 -0
- collibra_connector-1.1.0.dist-info/METADATA +540 -0
- collibra_connector-1.1.0.dist-info/RECORD +32 -0
- collibra_connector-1.1.0.dist-info/entry_points.txt +2 -0
- collibra_connector-1.0.18.dist-info/METADATA +0 -157
- collibra_connector-1.0.18.dist-info/RECORD +0 -21
- {collibra_connector-1.0.18.dist-info → collibra_connector-1.1.0.dist-info}/WHEEL +0 -0
- {collibra_connector-1.0.18.dist-info → collibra_connector-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {collibra_connector-1.0.18.dist-info → collibra_connector-1.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,716 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lineage Builder - Declarative data lineage creation.
|
|
3
|
+
|
|
4
|
+
This module provides a fluent API for building and committing
|
|
5
|
+
technical data lineage in Collibra, including assets and relations.
|
|
6
|
+
|
|
7
|
+
Example:
|
|
8
|
+
>>> from collibra_connector import CollibraConnector
|
|
9
|
+
>>> from collibra_connector.lineage import LineageBuilder, LineageNode
|
|
10
|
+
>>>
|
|
11
|
+
>>> conn = CollibraConnector(...)
|
|
12
|
+
>>> builder = LineageBuilder(conn)
|
|
13
|
+
>>>
|
|
14
|
+
>>> # Define nodes
|
|
15
|
+
>>> s3_table = LineageNode("s3://bucket/raw/customers", "Table")
|
|
16
|
+
>>> glue_job = LineageNode("etl_transform_customers", "Data Pipeline")
|
|
17
|
+
>>> redshift_table = LineageNode("warehouse.customers", "Table")
|
|
18
|
+
>>>
|
|
19
|
+
>>> # Build lineage
|
|
20
|
+
>>> builder.add_edge(s3_table, glue_job, "is source for")
|
|
21
|
+
>>> builder.add_edge(glue_job, redshift_table, "is target for")
|
|
22
|
+
>>>
|
|
23
|
+
>>> # Commit to Collibra
|
|
24
|
+
>>> result = builder.commit(domain_id="lineage-domain-uuid")
|
|
25
|
+
>>> print(f"Created {result.assets_created} assets, {result.relations_created} relations")
|
|
26
|
+
"""
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
from dataclasses import dataclass, field
|
|
30
|
+
from enum import Enum
|
|
31
|
+
from typing import Any, Dict, List, Optional, Set, Tuple, Union, TYPE_CHECKING
|
|
32
|
+
from uuid import uuid4
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from .connector import CollibraConnector
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class LineageDirection(str, Enum):
|
|
39
|
+
"""Direction of data flow in lineage."""
|
|
40
|
+
UPSTREAM = "upstream" # Data flows TO this node
|
|
41
|
+
DOWNSTREAM = "downstream" # Data flows FROM this node
|
|
42
|
+
BIDIRECTIONAL = "bidirectional"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class LineageRelationType(str, Enum):
|
|
46
|
+
"""Common lineage relation types."""
|
|
47
|
+
SOURCE_FOR = "is source for"
|
|
48
|
+
TARGET_FOR = "is target for"
|
|
49
|
+
TRANSFORMS = "transforms"
|
|
50
|
+
DERIVED_FROM = "is derived from"
|
|
51
|
+
CONTAINS = "contains"
|
|
52
|
+
PART_OF = "is part of"
|
|
53
|
+
USES = "uses"
|
|
54
|
+
PRODUCES = "produces"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
@dataclass
|
|
58
|
+
class LineageNode:
|
|
59
|
+
"""
|
|
60
|
+
Represents a node in the lineage graph.
|
|
61
|
+
|
|
62
|
+
A node can represent any data asset: a table, a file, an ETL job,
|
|
63
|
+
a dashboard, etc. Nodes can either reference existing Collibra assets
|
|
64
|
+
by ID or define new assets to be created.
|
|
65
|
+
|
|
66
|
+
Attributes:
|
|
67
|
+
name: The name of the node/asset.
|
|
68
|
+
asset_type: The asset type name (e.g., "Table", "Data Pipeline").
|
|
69
|
+
asset_id: Optional existing asset ID (if referencing existing asset).
|
|
70
|
+
display_name: Optional display name.
|
|
71
|
+
description: Optional description.
|
|
72
|
+
attributes: Optional dict of attribute name -> value.
|
|
73
|
+
metadata: Optional dict of additional metadata.
|
|
74
|
+
|
|
75
|
+
Example:
|
|
76
|
+
>>> # Reference existing asset
|
|
77
|
+
>>> existing = LineageNode.from_id("existing-asset-uuid")
|
|
78
|
+
>>>
|
|
79
|
+
>>> # Define new asset to create
|
|
80
|
+
>>> new_table = LineageNode(
|
|
81
|
+
... name="raw.customers",
|
|
82
|
+
... asset_type="Table",
|
|
83
|
+
... description="Raw customer data from CRM",
|
|
84
|
+
... attributes={"Data Source": "Salesforce"}
|
|
85
|
+
... )
|
|
86
|
+
"""
|
|
87
|
+
name: str
|
|
88
|
+
asset_type: str = "Data Asset"
|
|
89
|
+
asset_id: Optional[str] = None
|
|
90
|
+
display_name: Optional[str] = None
|
|
91
|
+
description: Optional[str] = None
|
|
92
|
+
attributes: Dict[str, Any] = field(default_factory=dict)
|
|
93
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
94
|
+
_internal_id: str = field(default_factory=lambda: str(uuid4()))
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def from_id(cls, asset_id: str, name: str = "") -> "LineageNode":
|
|
98
|
+
"""Create a node referencing an existing Collibra asset."""
|
|
99
|
+
return cls(name=name, asset_id=asset_id)
|
|
100
|
+
|
|
101
|
+
@classmethod
|
|
102
|
+
def table(
|
|
103
|
+
cls,
|
|
104
|
+
name: str,
|
|
105
|
+
schema: Optional[str] = None,
|
|
106
|
+
database: Optional[str] = None,
|
|
107
|
+
**kwargs: Any
|
|
108
|
+
) -> "LineageNode":
|
|
109
|
+
"""Create a Table node."""
|
|
110
|
+
full_name = name
|
|
111
|
+
if schema:
|
|
112
|
+
full_name = f"{schema}.{name}"
|
|
113
|
+
if database:
|
|
114
|
+
full_name = f"{database}.{full_name}"
|
|
115
|
+
return cls(name=full_name, asset_type="Table", **kwargs)
|
|
116
|
+
|
|
117
|
+
@classmethod
|
|
118
|
+
def column(cls, name: str, table: Optional[str] = None, **kwargs: Any) -> "LineageNode":
|
|
119
|
+
"""Create a Column node."""
|
|
120
|
+
full_name = f"{table}.{name}" if table else name
|
|
121
|
+
return cls(name=full_name, asset_type="Column", **kwargs)
|
|
122
|
+
|
|
123
|
+
@classmethod
|
|
124
|
+
def pipeline(cls, name: str, **kwargs: Any) -> "LineageNode":
|
|
125
|
+
"""Create a Data Pipeline node."""
|
|
126
|
+
return cls(name=name, asset_type="Data Pipeline", **kwargs)
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def report(cls, name: str, **kwargs: Any) -> "LineageNode":
|
|
130
|
+
"""Create a Report node."""
|
|
131
|
+
return cls(name=name, asset_type="Report", **kwargs)
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
def dashboard(cls, name: str, **kwargs: Any) -> "LineageNode":
|
|
135
|
+
"""Create a Dashboard node."""
|
|
136
|
+
return cls(name=name, asset_type="Dashboard", **kwargs)
|
|
137
|
+
|
|
138
|
+
def __hash__(self) -> int:
|
|
139
|
+
return hash(self._internal_id)
|
|
140
|
+
|
|
141
|
+
def __eq__(self, other: object) -> bool:
|
|
142
|
+
if isinstance(other, LineageNode):
|
|
143
|
+
return self._internal_id == other._internal_id
|
|
144
|
+
return False
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@dataclass
|
|
148
|
+
class LineageEdge:
|
|
149
|
+
"""
|
|
150
|
+
Represents an edge (relation) in the lineage graph.
|
|
151
|
+
|
|
152
|
+
An edge connects two nodes with a specific relation type,
|
|
153
|
+
representing data flow or dependency.
|
|
154
|
+
|
|
155
|
+
Attributes:
|
|
156
|
+
source: The source node.
|
|
157
|
+
target: The target node.
|
|
158
|
+
relation_type: The type of relation.
|
|
159
|
+
relation_type_id: Optional specific relation type UUID.
|
|
160
|
+
metadata: Optional additional metadata for the edge.
|
|
161
|
+
"""
|
|
162
|
+
source: LineageNode
|
|
163
|
+
target: LineageNode
|
|
164
|
+
relation_type: str = "is source for"
|
|
165
|
+
relation_type_id: Optional[str] = None
|
|
166
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
@dataclass
|
|
170
|
+
class LineageCommitResult:
|
|
171
|
+
"""
|
|
172
|
+
Result of committing lineage to Collibra.
|
|
173
|
+
|
|
174
|
+
Attributes:
|
|
175
|
+
success: Whether the commit was successful.
|
|
176
|
+
assets_created: Number of new assets created.
|
|
177
|
+
assets_updated: Number of existing assets updated.
|
|
178
|
+
relations_created: Number of relations created.
|
|
179
|
+
assets: Dict mapping node internal IDs to created asset IDs.
|
|
180
|
+
relations: List of created relation IDs.
|
|
181
|
+
errors: List of error messages.
|
|
182
|
+
"""
|
|
183
|
+
success: bool = True
|
|
184
|
+
assets_created: int = 0
|
|
185
|
+
assets_updated: int = 0
|
|
186
|
+
relations_created: int = 0
|
|
187
|
+
assets: Dict[str, str] = field(default_factory=dict)
|
|
188
|
+
relations: List[str] = field(default_factory=list)
|
|
189
|
+
errors: List[str] = field(default_factory=list)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class LineageBuilder:
|
|
193
|
+
"""
|
|
194
|
+
Fluent builder for creating data lineage in Collibra.
|
|
195
|
+
|
|
196
|
+
This class provides a declarative API for defining lineage graphs
|
|
197
|
+
and committing them to Collibra. It handles asset creation,
|
|
198
|
+
relation creation, and error handling automatically.
|
|
199
|
+
|
|
200
|
+
Example:
|
|
201
|
+
>>> builder = LineageBuilder(connector)
|
|
202
|
+
>>>
|
|
203
|
+
>>> # Define the lineage
|
|
204
|
+
>>> source = LineageNode.table("raw.orders", database="s3")
|
|
205
|
+
>>> transform = LineageNode.pipeline("transform_orders")
|
|
206
|
+
>>> target = LineageNode.table("orders", schema="warehouse")
|
|
207
|
+
>>>
|
|
208
|
+
>>> builder.add_edge(source, transform, "is source for")
|
|
209
|
+
>>> builder.add_edge(transform, target, "is source for")
|
|
210
|
+
>>>
|
|
211
|
+
>>> # Or use method chaining
|
|
212
|
+
>>> builder.source(source).through(transform).to(target)
|
|
213
|
+
>>>
|
|
214
|
+
>>> # Commit
|
|
215
|
+
>>> result = builder.commit(domain_id="lineage-domain-uuid")
|
|
216
|
+
|
|
217
|
+
Advanced Example with multiple paths:
|
|
218
|
+
>>> # Multiple sources to one target
|
|
219
|
+
>>> builder.add_edges([
|
|
220
|
+
... (table_a, etl_job, "is source for"),
|
|
221
|
+
... (table_b, etl_job, "is source for"),
|
|
222
|
+
... (table_c, etl_job, "is source for"),
|
|
223
|
+
... (etl_job, output_table, "is source for"),
|
|
224
|
+
... ])
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
def __init__(
|
|
228
|
+
self,
|
|
229
|
+
connector: "CollibraConnector",
|
|
230
|
+
default_relation_type: str = "is source for"
|
|
231
|
+
) -> None:
|
|
232
|
+
"""
|
|
233
|
+
Initialize the LineageBuilder.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
connector: The CollibraConnector instance.
|
|
237
|
+
default_relation_type: Default relation type for edges.
|
|
238
|
+
"""
|
|
239
|
+
self.connector = connector
|
|
240
|
+
self.default_relation_type = default_relation_type
|
|
241
|
+
self._nodes: Dict[str, LineageNode] = {}
|
|
242
|
+
self._edges: List[LineageEdge] = []
|
|
243
|
+
self._current_source: Optional[LineageNode] = None
|
|
244
|
+
self._type_id_cache: Dict[str, str] = {}
|
|
245
|
+
self._relation_type_cache: Dict[str, str] = {}
|
|
246
|
+
|
|
247
|
+
def add_node(self, node: LineageNode) -> "LineageBuilder":
|
|
248
|
+
"""
|
|
249
|
+
Add a node to the lineage graph.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
node: The LineageNode to add.
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
Self for method chaining.
|
|
256
|
+
"""
|
|
257
|
+
self._nodes[node._internal_id] = node
|
|
258
|
+
return self
|
|
259
|
+
|
|
260
|
+
def add_edge(
|
|
261
|
+
self,
|
|
262
|
+
source: LineageNode,
|
|
263
|
+
target: LineageNode,
|
|
264
|
+
relation_type: Optional[str] = None,
|
|
265
|
+
relation_type_id: Optional[str] = None
|
|
266
|
+
) -> "LineageBuilder":
|
|
267
|
+
"""
|
|
268
|
+
Add an edge (relation) between two nodes.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
source: The source node.
|
|
272
|
+
target: The target node.
|
|
273
|
+
relation_type: Relation type name (uses default if not specified).
|
|
274
|
+
relation_type_id: Optional specific relation type UUID.
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
Self for method chaining.
|
|
278
|
+
"""
|
|
279
|
+
# Auto-add nodes
|
|
280
|
+
self.add_node(source)
|
|
281
|
+
self.add_node(target)
|
|
282
|
+
|
|
283
|
+
edge = LineageEdge(
|
|
284
|
+
source=source,
|
|
285
|
+
target=target,
|
|
286
|
+
relation_type=relation_type or self.default_relation_type,
|
|
287
|
+
relation_type_id=relation_type_id
|
|
288
|
+
)
|
|
289
|
+
self._edges.append(edge)
|
|
290
|
+
return self
|
|
291
|
+
|
|
292
|
+
def add_edges(
|
|
293
|
+
self,
|
|
294
|
+
edges: List[Tuple[LineageNode, LineageNode, str]]
|
|
295
|
+
) -> "LineageBuilder":
|
|
296
|
+
"""
|
|
297
|
+
Add multiple edges at once.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
edges: List of (source, target, relation_type) tuples.
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
Self for method chaining.
|
|
304
|
+
"""
|
|
305
|
+
for source, target, relation_type in edges:
|
|
306
|
+
self.add_edge(source, target, relation_type)
|
|
307
|
+
return self
|
|
308
|
+
|
|
309
|
+
def source(self, node: LineageNode) -> "LineageBuilder":
|
|
310
|
+
"""
|
|
311
|
+
Set the current source node for fluent API.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
node: The source node.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
Self for method chaining.
|
|
318
|
+
|
|
319
|
+
Example:
|
|
320
|
+
>>> builder.source(table_a).to(table_b)
|
|
321
|
+
"""
|
|
322
|
+
self.add_node(node)
|
|
323
|
+
self._current_source = node
|
|
324
|
+
return self
|
|
325
|
+
|
|
326
|
+
def through(
|
|
327
|
+
self,
|
|
328
|
+
node: LineageNode,
|
|
329
|
+
relation_type: Optional[str] = None
|
|
330
|
+
) -> "LineageBuilder":
|
|
331
|
+
"""
|
|
332
|
+
Add an intermediate node (like an ETL job).
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
node: The intermediate node.
|
|
336
|
+
relation_type: Relation type from source to this node.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Self for method chaining.
|
|
340
|
+
|
|
341
|
+
Example:
|
|
342
|
+
>>> builder.source(raw_table).through(etl_job).to(warehouse_table)
|
|
343
|
+
"""
|
|
344
|
+
if self._current_source is None:
|
|
345
|
+
raise ValueError("Must call source() before through()")
|
|
346
|
+
|
|
347
|
+
self.add_edge(self._current_source, node, relation_type)
|
|
348
|
+
self._current_source = node
|
|
349
|
+
return self
|
|
350
|
+
|
|
351
|
+
def to(
|
|
352
|
+
self,
|
|
353
|
+
node: LineageNode,
|
|
354
|
+
relation_type: Optional[str] = None
|
|
355
|
+
) -> "LineageBuilder":
|
|
356
|
+
"""
|
|
357
|
+
Add the target node and create edge from current source.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
node: The target node.
|
|
361
|
+
relation_type: Relation type to target.
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
Self for method chaining.
|
|
365
|
+
|
|
366
|
+
Example:
|
|
367
|
+
>>> builder.source(table_a).to(table_b)
|
|
368
|
+
"""
|
|
369
|
+
if self._current_source is None:
|
|
370
|
+
raise ValueError("Must call source() before to()")
|
|
371
|
+
|
|
372
|
+
self.add_edge(self._current_source, node, relation_type)
|
|
373
|
+
return self
|
|
374
|
+
|
|
375
|
+
def chain(
|
|
376
|
+
self,
|
|
377
|
+
*nodes: LineageNode,
|
|
378
|
+
relation_type: Optional[str] = None
|
|
379
|
+
) -> "LineageBuilder":
|
|
380
|
+
"""
|
|
381
|
+
Create a chain of nodes with edges between consecutive pairs.
|
|
382
|
+
|
|
383
|
+
Args:
|
|
384
|
+
*nodes: Nodes to chain together.
|
|
385
|
+
relation_type: Relation type for all edges.
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
Self for method chaining.
|
|
389
|
+
|
|
390
|
+
Example:
|
|
391
|
+
>>> builder.chain(source, transform1, transform2, target)
|
|
392
|
+
"""
|
|
393
|
+
for i in range(len(nodes) - 1):
|
|
394
|
+
self.add_edge(nodes[i], nodes[i + 1], relation_type)
|
|
395
|
+
return self
|
|
396
|
+
|
|
397
|
+
def fan_in(
|
|
398
|
+
self,
|
|
399
|
+
sources: List[LineageNode],
|
|
400
|
+
target: LineageNode,
|
|
401
|
+
relation_type: Optional[str] = None
|
|
402
|
+
) -> "LineageBuilder":
|
|
403
|
+
"""
|
|
404
|
+
Multiple sources feeding into one target.
|
|
405
|
+
|
|
406
|
+
Args:
|
|
407
|
+
sources: List of source nodes.
|
|
408
|
+
target: The target node.
|
|
409
|
+
relation_type: Relation type for all edges.
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
Self for method chaining.
|
|
413
|
+
|
|
414
|
+
Example:
|
|
415
|
+
>>> # Multiple tables feeding into one ETL job
|
|
416
|
+
>>> builder.fan_in([table_a, table_b, table_c], etl_job)
|
|
417
|
+
"""
|
|
418
|
+
for source in sources:
|
|
419
|
+
self.add_edge(source, target, relation_type)
|
|
420
|
+
return self
|
|
421
|
+
|
|
422
|
+
def fan_out(
|
|
423
|
+
self,
|
|
424
|
+
source: LineageNode,
|
|
425
|
+
targets: List[LineageNode],
|
|
426
|
+
relation_type: Optional[str] = None
|
|
427
|
+
) -> "LineageBuilder":
|
|
428
|
+
"""
|
|
429
|
+
One source feeding into multiple targets.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
source: The source node.
|
|
433
|
+
targets: List of target nodes.
|
|
434
|
+
relation_type: Relation type for all edges.
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
Self for method chaining.
|
|
438
|
+
|
|
439
|
+
Example:
|
|
440
|
+
>>> # One ETL job producing multiple tables
|
|
441
|
+
>>> builder.fan_out(etl_job, [table_a, table_b, table_c])
|
|
442
|
+
"""
|
|
443
|
+
for target in targets:
|
|
444
|
+
self.add_edge(source, target, relation_type)
|
|
445
|
+
return self
|
|
446
|
+
|
|
447
|
+
def _resolve_asset_type_id(self, type_name: str) -> Optional[str]:
|
|
448
|
+
"""Get asset type ID by name, with caching."""
|
|
449
|
+
if type_name in self._type_id_cache:
|
|
450
|
+
return self._type_id_cache[type_name]
|
|
451
|
+
|
|
452
|
+
try:
|
|
453
|
+
# Try to find the asset type
|
|
454
|
+
result = self.connector.metadata.get_asset_types(name=type_name, limit=1)
|
|
455
|
+
types = result.get("results", [])
|
|
456
|
+
if types:
|
|
457
|
+
type_id = types[0].get("id")
|
|
458
|
+
self._type_id_cache[type_name] = type_id
|
|
459
|
+
return type_id
|
|
460
|
+
except Exception:
|
|
461
|
+
pass
|
|
462
|
+
|
|
463
|
+
return None
|
|
464
|
+
|
|
465
|
+
def _resolve_relation_type_id(self, role: str) -> Optional[str]:
|
|
466
|
+
"""Get relation type ID by role name, with caching."""
|
|
467
|
+
if role in self._relation_type_cache:
|
|
468
|
+
return self._relation_type_cache[role]
|
|
469
|
+
|
|
470
|
+
try:
|
|
471
|
+
# Search for relation type by role
|
|
472
|
+
result = self.connector.metadata.get_relation_types(role=role, limit=1)
|
|
473
|
+
types = result.get("results", [])
|
|
474
|
+
if types:
|
|
475
|
+
type_id = types[0].get("id")
|
|
476
|
+
self._relation_type_cache[role] = type_id
|
|
477
|
+
return type_id
|
|
478
|
+
except Exception:
|
|
479
|
+
pass
|
|
480
|
+
|
|
481
|
+
return None
|
|
482
|
+
|
|
483
|
+
def commit(
|
|
484
|
+
self,
|
|
485
|
+
domain_id: str,
|
|
486
|
+
status_id: Optional[str] = None,
|
|
487
|
+
dry_run: bool = False,
|
|
488
|
+
create_missing_types: bool = False
|
|
489
|
+
) -> LineageCommitResult:
|
|
490
|
+
"""
|
|
491
|
+
Commit the lineage graph to Collibra.
|
|
492
|
+
|
|
493
|
+
This method:
|
|
494
|
+
1. Creates any new assets defined in nodes
|
|
495
|
+
2. Creates relations between assets
|
|
496
|
+
3. Sets attributes on new assets
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
domain_id: The domain to create assets in.
|
|
500
|
+
status_id: Optional status for new assets.
|
|
501
|
+
dry_run: If True, validate but don't create anything.
|
|
502
|
+
create_missing_types: If True, log warnings for missing types.
|
|
503
|
+
|
|
504
|
+
Returns:
|
|
505
|
+
LineageCommitResult with success status and created IDs.
|
|
506
|
+
|
|
507
|
+
Example:
|
|
508
|
+
>>> result = builder.commit(domain_id="lineage-domain")
|
|
509
|
+
>>> if result.success:
|
|
510
|
+
... print(f"Created {result.assets_created} assets")
|
|
511
|
+
... print(f"Created {result.relations_created} relations")
|
|
512
|
+
>>> else:
|
|
513
|
+
... for error in result.errors:
|
|
514
|
+
... print(f"Error: {error}")
|
|
515
|
+
"""
|
|
516
|
+
result = LineageCommitResult()
|
|
517
|
+
|
|
518
|
+
if dry_run:
|
|
519
|
+
result.assets_created = len([n for n in self._nodes.values() if not n.asset_id])
|
|
520
|
+
result.relations_created = len(self._edges)
|
|
521
|
+
return result
|
|
522
|
+
|
|
523
|
+
# Phase 1: Create or resolve all assets
|
|
524
|
+
for internal_id, node in self._nodes.items():
|
|
525
|
+
try:
|
|
526
|
+
if node.asset_id:
|
|
527
|
+
# Existing asset - just map the ID
|
|
528
|
+
result.assets[internal_id] = node.asset_id
|
|
529
|
+
else:
|
|
530
|
+
# New asset - create it
|
|
531
|
+
type_id = self._resolve_asset_type_id(node.asset_type)
|
|
532
|
+
|
|
533
|
+
if not type_id:
|
|
534
|
+
result.errors.append(
|
|
535
|
+
f"Asset type not found: {node.asset_type} for node {node.name}"
|
|
536
|
+
)
|
|
537
|
+
continue
|
|
538
|
+
|
|
539
|
+
asset_data = {
|
|
540
|
+
"name": node.name,
|
|
541
|
+
"domain_id": domain_id,
|
|
542
|
+
"type_id": type_id,
|
|
543
|
+
}
|
|
544
|
+
if node.display_name:
|
|
545
|
+
asset_data["display_name"] = node.display_name
|
|
546
|
+
if status_id:
|
|
547
|
+
asset_data["status_id"] = status_id
|
|
548
|
+
|
|
549
|
+
created = self.connector.asset.add_asset(**asset_data)
|
|
550
|
+
asset_id = created.get("id")
|
|
551
|
+
result.assets[internal_id] = asset_id
|
|
552
|
+
result.assets_created += 1
|
|
553
|
+
|
|
554
|
+
# Set description attribute if provided
|
|
555
|
+
if node.description:
|
|
556
|
+
try:
|
|
557
|
+
self.connector.asset.set_asset_attributes(
|
|
558
|
+
asset_id=asset_id,
|
|
559
|
+
type_public_id="Description",
|
|
560
|
+
values=[node.description]
|
|
561
|
+
)
|
|
562
|
+
except Exception:
|
|
563
|
+
pass # Description is optional
|
|
564
|
+
|
|
565
|
+
# Set custom attributes
|
|
566
|
+
for attr_name, attr_value in node.attributes.items():
|
|
567
|
+
try:
|
|
568
|
+
self.connector.asset.set_asset_attributes(
|
|
569
|
+
asset_id=asset_id,
|
|
570
|
+
type_public_id=attr_name,
|
|
571
|
+
values=[attr_value]
|
|
572
|
+
)
|
|
573
|
+
except Exception:
|
|
574
|
+
pass # Custom attributes are optional
|
|
575
|
+
|
|
576
|
+
except Exception as e:
|
|
577
|
+
result.errors.append(f"Failed to create asset {node.name}: {str(e)}")
|
|
578
|
+
result.success = False
|
|
579
|
+
|
|
580
|
+
# Phase 2: Create relations
|
|
581
|
+
for edge in self._edges:
|
|
582
|
+
try:
|
|
583
|
+
source_id = result.assets.get(edge.source._internal_id)
|
|
584
|
+
target_id = result.assets.get(edge.target._internal_id)
|
|
585
|
+
|
|
586
|
+
if not source_id:
|
|
587
|
+
result.errors.append(
|
|
588
|
+
f"Source asset not found for edge: {edge.source.name}"
|
|
589
|
+
)
|
|
590
|
+
continue
|
|
591
|
+
|
|
592
|
+
if not target_id:
|
|
593
|
+
result.errors.append(
|
|
594
|
+
f"Target asset not found for edge: {edge.target.name}"
|
|
595
|
+
)
|
|
596
|
+
continue
|
|
597
|
+
|
|
598
|
+
# Resolve relation type
|
|
599
|
+
relation_type_id = edge.relation_type_id
|
|
600
|
+
if not relation_type_id:
|
|
601
|
+
relation_type_id = self._resolve_relation_type_id(edge.relation_type)
|
|
602
|
+
|
|
603
|
+
if not relation_type_id:
|
|
604
|
+
result.errors.append(
|
|
605
|
+
f"Relation type not found: {edge.relation_type}"
|
|
606
|
+
)
|
|
607
|
+
continue
|
|
608
|
+
|
|
609
|
+
created = self.connector.relation.add_relation(
|
|
610
|
+
source_id=source_id,
|
|
611
|
+
target_id=target_id,
|
|
612
|
+
type_id=relation_type_id
|
|
613
|
+
)
|
|
614
|
+
relation_id = created.get("id")
|
|
615
|
+
result.relations.append(relation_id)
|
|
616
|
+
result.relations_created += 1
|
|
617
|
+
|
|
618
|
+
except Exception as e:
|
|
619
|
+
result.errors.append(
|
|
620
|
+
f"Failed to create relation {edge.source.name} -> {edge.target.name}: {str(e)}"
|
|
621
|
+
)
|
|
622
|
+
result.success = False
|
|
623
|
+
|
|
624
|
+
if result.errors:
|
|
625
|
+
result.success = False
|
|
626
|
+
|
|
627
|
+
return result
|
|
628
|
+
|
|
629
|
+
def clear(self) -> "LineageBuilder":
|
|
630
|
+
"""Clear all nodes and edges."""
|
|
631
|
+
self._nodes.clear()
|
|
632
|
+
self._edges.clear()
|
|
633
|
+
self._current_source = None
|
|
634
|
+
return self
|
|
635
|
+
|
|
636
|
+
def get_nodes(self) -> List[LineageNode]:
|
|
637
|
+
"""Get all nodes in the graph."""
|
|
638
|
+
return list(self._nodes.values())
|
|
639
|
+
|
|
640
|
+
def get_edges(self) -> List[LineageEdge]:
|
|
641
|
+
"""Get all edges in the graph."""
|
|
642
|
+
return list(self._edges)
|
|
643
|
+
|
|
644
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
645
|
+
"""Export the lineage graph as a dictionary."""
|
|
646
|
+
return {
|
|
647
|
+
"nodes": [
|
|
648
|
+
{
|
|
649
|
+
"id": n._internal_id,
|
|
650
|
+
"name": n.name,
|
|
651
|
+
"asset_type": n.asset_type,
|
|
652
|
+
"asset_id": n.asset_id,
|
|
653
|
+
"description": n.description,
|
|
654
|
+
"attributes": n.attributes
|
|
655
|
+
}
|
|
656
|
+
for n in self._nodes.values()
|
|
657
|
+
],
|
|
658
|
+
"edges": [
|
|
659
|
+
{
|
|
660
|
+
"source": e.source._internal_id,
|
|
661
|
+
"target": e.target._internal_id,
|
|
662
|
+
"relation_type": e.relation_type
|
|
663
|
+
}
|
|
664
|
+
for e in self._edges
|
|
665
|
+
]
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
def from_dict(self, data: Dict[str, Any]) -> "LineageBuilder":
|
|
669
|
+
"""Import lineage graph from a dictionary."""
|
|
670
|
+
self.clear()
|
|
671
|
+
|
|
672
|
+
node_map: Dict[str, LineageNode] = {}
|
|
673
|
+
|
|
674
|
+
for node_data in data.get("nodes", []):
|
|
675
|
+
node = LineageNode(
|
|
676
|
+
name=node_data["name"],
|
|
677
|
+
asset_type=node_data.get("asset_type", "Data Asset"),
|
|
678
|
+
asset_id=node_data.get("asset_id"),
|
|
679
|
+
description=node_data.get("description"),
|
|
680
|
+
attributes=node_data.get("attributes", {})
|
|
681
|
+
)
|
|
682
|
+
node._internal_id = node_data.get("id", node._internal_id)
|
|
683
|
+
node_map[node._internal_id] = node
|
|
684
|
+
self.add_node(node)
|
|
685
|
+
|
|
686
|
+
for edge_data in data.get("edges", []):
|
|
687
|
+
source = node_map.get(edge_data["source"])
|
|
688
|
+
target = node_map.get(edge_data["target"])
|
|
689
|
+
if source and target:
|
|
690
|
+
self.add_edge(source, target, edge_data.get("relation_type"))
|
|
691
|
+
|
|
692
|
+
return self
|
|
693
|
+
|
|
694
|
+
def visualize(self) -> str:
|
|
695
|
+
"""
|
|
696
|
+
Generate a simple ASCII visualization of the lineage.
|
|
697
|
+
|
|
698
|
+
Returns:
|
|
699
|
+
ASCII representation of the lineage graph.
|
|
700
|
+
"""
|
|
701
|
+
lines = ["Lineage Graph:", "=" * 40]
|
|
702
|
+
|
|
703
|
+
# Group edges by source
|
|
704
|
+
by_source: Dict[str, List[LineageEdge]] = {}
|
|
705
|
+
for edge in self._edges:
|
|
706
|
+
key = edge.source.name
|
|
707
|
+
if key not in by_source:
|
|
708
|
+
by_source[key] = []
|
|
709
|
+
by_source[key].append(edge)
|
|
710
|
+
|
|
711
|
+
for source_name, edges in by_source.items():
|
|
712
|
+
lines.append(f"\n[{source_name}]")
|
|
713
|
+
for edge in edges:
|
|
714
|
+
lines.append(f" --({edge.relation_type})--> [{edge.target.name}]")
|
|
715
|
+
|
|
716
|
+
return "\n".join(lines)
|