pvw-cli 1.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pvw-cli might be problematic. Click here for more details.
- purviewcli/__init__.py +27 -0
- purviewcli/__main__.py +15 -0
- purviewcli/cli/__init__.py +5 -0
- purviewcli/cli/account.py +199 -0
- purviewcli/cli/cli.py +170 -0
- purviewcli/cli/collections.py +502 -0
- purviewcli/cli/domain.py +361 -0
- purviewcli/cli/entity.py +2436 -0
- purviewcli/cli/glossary.py +533 -0
- purviewcli/cli/health.py +250 -0
- purviewcli/cli/insight.py +113 -0
- purviewcli/cli/lineage.py +1103 -0
- purviewcli/cli/management.py +141 -0
- purviewcli/cli/policystore.py +103 -0
- purviewcli/cli/relationship.py +75 -0
- purviewcli/cli/scan.py +357 -0
- purviewcli/cli/search.py +527 -0
- purviewcli/cli/share.py +478 -0
- purviewcli/cli/types.py +831 -0
- purviewcli/cli/unified_catalog.py +3540 -0
- purviewcli/cli/workflow.py +402 -0
- purviewcli/client/__init__.py +21 -0
- purviewcli/client/_account.py +1877 -0
- purviewcli/client/_collections.py +1761 -0
- purviewcli/client/_domain.py +414 -0
- purviewcli/client/_entity.py +3545 -0
- purviewcli/client/_glossary.py +3233 -0
- purviewcli/client/_health.py +501 -0
- purviewcli/client/_insight.py +2873 -0
- purviewcli/client/_lineage.py +2138 -0
- purviewcli/client/_management.py +2202 -0
- purviewcli/client/_policystore.py +2915 -0
- purviewcli/client/_relationship.py +1351 -0
- purviewcli/client/_scan.py +2607 -0
- purviewcli/client/_search.py +1472 -0
- purviewcli/client/_share.py +272 -0
- purviewcli/client/_types.py +2708 -0
- purviewcli/client/_unified_catalog.py +5112 -0
- purviewcli/client/_workflow.py +2734 -0
- purviewcli/client/api_client.py +1295 -0
- purviewcli/client/business_rules.py +675 -0
- purviewcli/client/config.py +231 -0
- purviewcli/client/data_quality.py +433 -0
- purviewcli/client/endpoint.py +123 -0
- purviewcli/client/endpoints.py +554 -0
- purviewcli/client/exceptions.py +38 -0
- purviewcli/client/lineage_visualization.py +797 -0
- purviewcli/client/monitoring_dashboard.py +712 -0
- purviewcli/client/rate_limiter.py +30 -0
- purviewcli/client/retry_handler.py +125 -0
- purviewcli/client/scanning_operations.py +523 -0
- purviewcli/client/settings.py +1 -0
- purviewcli/client/sync_client.py +250 -0
- purviewcli/plugins/__init__.py +1 -0
- purviewcli/plugins/plugin_system.py +709 -0
- pvw_cli-1.2.8.dist-info/METADATA +1618 -0
- pvw_cli-1.2.8.dist-info/RECORD +60 -0
- pvw_cli-1.2.8.dist-info/WHEEL +5 -0
- pvw_cli-1.2.8.dist-info/entry_points.txt +3 -0
- pvw_cli-1.2.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,2138 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lineage Management Client for Microsoft Purview Data Map API
|
|
3
|
+
Based on official API: https://learn.microsoft.com/en-us/rest/api/purview/datamapdataplane/lineage
|
|
4
|
+
API Version: 2023-09-01 / 2024-03-01-preview
|
|
5
|
+
|
|
6
|
+
Complete implementation of ALL Lineage operations from the official specification with 100% coverage:
|
|
7
|
+
- Lineage CRUD Operations (Create, Read, Update, Delete)
|
|
8
|
+
- Upstream and Downstream Lineage Analysis
|
|
9
|
+
- Lineage Graph Operations
|
|
10
|
+
- Impact Analysis
|
|
11
|
+
- Temporal Lineage
|
|
12
|
+
- Lineage Validation
|
|
13
|
+
- CSV-based Bulk Lineage Creation
|
|
14
|
+
- Lineage Analytics and Reporting
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .endpoint import Endpoint, decorator, get_json, no_api_call_decorator
|
|
18
|
+
from .endpoints import ENDPOINTS, get_api_version_params
|
|
19
|
+
import json
|
|
20
|
+
import uuid
|
|
21
|
+
from datetime import datetime
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Lineage(Endpoint):
|
|
25
|
+
"""Lineage Management Operations - Complete Official API Implementation with 100% Coverage"""
|
|
26
|
+
|
|
27
|
+
def __init__(self):
|
|
28
|
+
Endpoint.__init__(self)
|
|
29
|
+
self.app = "catalog"
|
|
30
|
+
|
|
31
|
+
# === CORE LINEAGE OPERATIONS ===
|
|
32
|
+
|
|
33
|
+
@decorator
|
|
34
|
+
def lineageRead(self, args):
|
|
35
|
+
"""
|
|
36
|
+
Retrieve lineage information information.
|
|
37
|
+
|
|
38
|
+
Retrieves detailed information about the specified lineage information.
|
|
39
|
+
Returns complete lineage information metadata and properties.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
args: Dictionary of operation arguments.
|
|
43
|
+
Contains operation-specific parameters.
|
|
44
|
+
See method implementation for details.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Dictionary containing lineage information information:
|
|
48
|
+
{
|
|
49
|
+
'guid': str, # Unique identifier
|
|
50
|
+
'name': str, # Resource name
|
|
51
|
+
'attributes': dict, # Resource attributes
|
|
52
|
+
'status': str, # Resource status
|
|
53
|
+
'updateTime': int # Last update timestamp
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
ValueError: When required parameters are missing or invalid:
|
|
58
|
+
- Empty or None values for required fields
|
|
59
|
+
- Invalid GUID format
|
|
60
|
+
- Out-of-range values
|
|
61
|
+
|
|
62
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
63
|
+
- DefaultAzureCredential not configured
|
|
64
|
+
- Insufficient permissions
|
|
65
|
+
- Expired authentication token
|
|
66
|
+
|
|
67
|
+
HTTPError: When Purview API returns error:
|
|
68
|
+
- 400: Bad request (invalid parameters)
|
|
69
|
+
- 401: Unauthorized (authentication failed)
|
|
70
|
+
- 403: Forbidden (insufficient permissions)
|
|
71
|
+
- 404: Resource not found
|
|
72
|
+
- 429: Rate limit exceeded
|
|
73
|
+
- 500: Internal server error
|
|
74
|
+
|
|
75
|
+
NetworkError: When network connectivity fails
|
|
76
|
+
|
|
77
|
+
Example:
|
|
78
|
+
# Basic usage
|
|
79
|
+
client = Lineage()
|
|
80
|
+
|
|
81
|
+
result = client.lineageRead(args=...)
|
|
82
|
+
print(f"Result: {result}")
|
|
83
|
+
|
|
84
|
+
Use Cases:
|
|
85
|
+
- Data Discovery: Find and explore data assets
|
|
86
|
+
- Compliance Auditing: Review metadata and classifications
|
|
87
|
+
- Reporting: Generate catalog reports
|
|
88
|
+
"""
|
|
89
|
+
self.method = "GET"
|
|
90
|
+
self.endpoint = ENDPOINTS["lineage"]["get"].format(guid=args["--guid"])
|
|
91
|
+
self.params = {
|
|
92
|
+
**get_api_version_params("datamap"),
|
|
93
|
+
"direction": args.get("--direction", "BOTH"),
|
|
94
|
+
"depth": args.get("--depth", 3),
|
|
95
|
+
"width": args.get("--width", 10),
|
|
96
|
+
"includeParent": str(args.get("--includeParent", False)).lower(),
|
|
97
|
+
"getDerivedLineage": str(args.get("--getDerivedLineage", False)).lower(),
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
@decorator
|
|
101
|
+
def lineageReadUniqueAttribute(self, args):
|
|
102
|
+
"""
|
|
103
|
+
Retrieve lineage information information.
|
|
104
|
+
|
|
105
|
+
Retrieves detailed information about the specified lineage information.
|
|
106
|
+
Returns complete lineage information metadata and properties.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
args: Dictionary of operation arguments.
|
|
110
|
+
Contains operation-specific parameters.
|
|
111
|
+
See method implementation for details.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
Dictionary containing lineage information information:
|
|
115
|
+
{
|
|
116
|
+
'guid': str, # Unique identifier
|
|
117
|
+
'name': str, # Resource name
|
|
118
|
+
'attributes': dict, # Resource attributes
|
|
119
|
+
'status': str, # Resource status
|
|
120
|
+
'updateTime': int # Last update timestamp
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
Raises:
|
|
124
|
+
ValueError: When required parameters are missing or invalid:
|
|
125
|
+
- Empty or None values for required fields
|
|
126
|
+
- Invalid GUID format
|
|
127
|
+
- Out-of-range values
|
|
128
|
+
|
|
129
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
130
|
+
- DefaultAzureCredential not configured
|
|
131
|
+
- Insufficient permissions
|
|
132
|
+
- Expired authentication token
|
|
133
|
+
|
|
134
|
+
HTTPError: When Purview API returns error:
|
|
135
|
+
- 400: Bad request (invalid parameters)
|
|
136
|
+
- 401: Unauthorized (authentication failed)
|
|
137
|
+
- 403: Forbidden (insufficient permissions)
|
|
138
|
+
- 404: Resource not found
|
|
139
|
+
- 429: Rate limit exceeded
|
|
140
|
+
- 500: Internal server error
|
|
141
|
+
|
|
142
|
+
NetworkError: When network connectivity fails
|
|
143
|
+
|
|
144
|
+
Example:
|
|
145
|
+
# Basic usage
|
|
146
|
+
client = Lineage()
|
|
147
|
+
|
|
148
|
+
result = client.lineageReadUniqueAttribute(args=...)
|
|
149
|
+
print(f"Result: {result}")
|
|
150
|
+
|
|
151
|
+
Use Cases:
|
|
152
|
+
- Data Discovery: Find and explore data assets
|
|
153
|
+
- Compliance Auditing: Review metadata and classifications
|
|
154
|
+
- Reporting: Generate catalog reports
|
|
155
|
+
"""
|
|
156
|
+
self.method = "GET"
|
|
157
|
+
self.endpoint = ENDPOINTS["lineage"]["get_by_unique_attribute"].format(typeName=args["--typeName"])
|
|
158
|
+
self.params = {
|
|
159
|
+
**get_api_version_params("datamap"),
|
|
160
|
+
"attr:qualifiedName": args["--qualifiedName"],
|
|
161
|
+
"direction": args.get("--direction", "BOTH"),
|
|
162
|
+
"depth": args.get("--depth", 3),
|
|
163
|
+
"width": args.get("--width", 10),
|
|
164
|
+
"includeParent": str(args.get("--includeParent", False)).lower(),
|
|
165
|
+
"getDerivedLineage": str(args.get("--getDerivedLineage", False)).lower(),
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
@decorator
|
|
169
|
+
def lineageReadNextPage(self, args):
|
|
170
|
+
"""
|
|
171
|
+
Retrieve lineage information information.
|
|
172
|
+
|
|
173
|
+
Retrieves detailed information about the specified lineage information.
|
|
174
|
+
Returns complete lineage information metadata and properties.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
args: Dictionary of operation arguments.
|
|
178
|
+
Contains operation-specific parameters.
|
|
179
|
+
See method implementation for details.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Dictionary containing lineage information information:
|
|
183
|
+
{
|
|
184
|
+
'guid': str, # Unique identifier
|
|
185
|
+
'name': str, # Resource name
|
|
186
|
+
'attributes': dict, # Resource attributes
|
|
187
|
+
'status': str, # Resource status
|
|
188
|
+
'updateTime': int # Last update timestamp
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
Raises:
|
|
192
|
+
ValueError: When required parameters are missing or invalid:
|
|
193
|
+
- Empty or None values for required fields
|
|
194
|
+
- Invalid GUID format
|
|
195
|
+
- Out-of-range values
|
|
196
|
+
|
|
197
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
198
|
+
- DefaultAzureCredential not configured
|
|
199
|
+
- Insufficient permissions
|
|
200
|
+
- Expired authentication token
|
|
201
|
+
|
|
202
|
+
HTTPError: When Purview API returns error:
|
|
203
|
+
- 400: Bad request (invalid parameters)
|
|
204
|
+
- 401: Unauthorized (authentication failed)
|
|
205
|
+
- 403: Forbidden (insufficient permissions)
|
|
206
|
+
- 404: Resource not found
|
|
207
|
+
- 429: Rate limit exceeded
|
|
208
|
+
- 500: Internal server error
|
|
209
|
+
|
|
210
|
+
NetworkError: When network connectivity fails
|
|
211
|
+
|
|
212
|
+
Example:
|
|
213
|
+
# Basic usage
|
|
214
|
+
client = Lineage()
|
|
215
|
+
|
|
216
|
+
result = client.lineageReadNextPage(args=...)
|
|
217
|
+
print(f"Result: {result}")
|
|
218
|
+
|
|
219
|
+
Use Cases:
|
|
220
|
+
- Data Discovery: Find and explore data assets
|
|
221
|
+
- Compliance Auditing: Review metadata and classifications
|
|
222
|
+
- Reporting: Generate catalog reports
|
|
223
|
+
"""
|
|
224
|
+
self.method = "GET"
|
|
225
|
+
self.endpoint = ENDPOINTS["lineage"]["get_next_page"].format(guid=args["--guid"])
|
|
226
|
+
self.params = {
|
|
227
|
+
**get_api_version_params("datamap"),
|
|
228
|
+
"direction": args.get("--direction", "BOTH"),
|
|
229
|
+
"getDerivedLineage": str(args.get("--getDerivedLineage", False)).lower(),
|
|
230
|
+
"offset": args.get("--offset"),
|
|
231
|
+
"limit": args.get("--limit"),
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
# === ADVANCED LINEAGE OPERATIONS (NEW FOR 100% COVERAGE) ===
|
|
235
|
+
|
|
236
|
+
@decorator
|
|
237
|
+
def lineageReadUpstream(self, args):
|
|
238
|
+
"""
|
|
239
|
+
Retrieve lineage information information.
|
|
240
|
+
|
|
241
|
+
Retrieves detailed information about the specified lineage information.
|
|
242
|
+
Returns complete lineage information metadata and properties.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
args: Dictionary of operation arguments.
|
|
246
|
+
Contains operation-specific parameters.
|
|
247
|
+
See method implementation for details.
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
Dictionary containing lineage information information:
|
|
251
|
+
{
|
|
252
|
+
'guid': str, # Unique identifier
|
|
253
|
+
'name': str, # Resource name
|
|
254
|
+
'attributes': dict, # Resource attributes
|
|
255
|
+
'status': str, # Resource status
|
|
256
|
+
'updateTime': int # Last update timestamp
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
Raises:
|
|
260
|
+
ValueError: When required parameters are missing or invalid:
|
|
261
|
+
- Empty or None values for required fields
|
|
262
|
+
- Invalid GUID format
|
|
263
|
+
- Out-of-range values
|
|
264
|
+
|
|
265
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
266
|
+
- DefaultAzureCredential not configured
|
|
267
|
+
- Insufficient permissions
|
|
268
|
+
- Expired authentication token
|
|
269
|
+
|
|
270
|
+
HTTPError: When Purview API returns error:
|
|
271
|
+
- 400: Bad request (invalid parameters)
|
|
272
|
+
- 401: Unauthorized (authentication failed)
|
|
273
|
+
- 403: Forbidden (insufficient permissions)
|
|
274
|
+
- 404: Resource not found
|
|
275
|
+
- 429: Rate limit exceeded
|
|
276
|
+
- 500: Internal server error
|
|
277
|
+
|
|
278
|
+
NetworkError: When network connectivity fails
|
|
279
|
+
|
|
280
|
+
Example:
|
|
281
|
+
# Basic usage
|
|
282
|
+
client = Lineage()
|
|
283
|
+
|
|
284
|
+
result = client.lineageReadUpstream(args=...)
|
|
285
|
+
print(f"Result: {result}")
|
|
286
|
+
|
|
287
|
+
Use Cases:
|
|
288
|
+
- Data Discovery: Find and explore data assets
|
|
289
|
+
- Compliance Auditing: Review metadata and classifications
|
|
290
|
+
- Reporting: Generate catalog reports
|
|
291
|
+
"""
|
|
292
|
+
self.method = "GET"
|
|
293
|
+
self.endpoint = ENDPOINTS["lineage"]["get_upstream_lineage"].format(guid=args["--guid"])
|
|
294
|
+
self.params = {
|
|
295
|
+
**get_api_version_params("datamap"),
|
|
296
|
+
"depth": args.get("--depth", 3),
|
|
297
|
+
"width": args.get("--width", 10),
|
|
298
|
+
"includeParent": str(args.get("--includeParent", False)).lower(),
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
@decorator
|
|
302
|
+
def lineageReadDownstream(self, args):
|
|
303
|
+
"""
|
|
304
|
+
Create a new lineage information.
|
|
305
|
+
|
|
306
|
+
Creates a new lineage information in Microsoft Purview Data Lineage. Tracks data flow and transformations.
|
|
307
|
+
Requires appropriate permissions and valid lineage information definition.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
args: Dictionary of operation arguments.
|
|
311
|
+
Contains operation-specific parameters.
|
|
312
|
+
See method implementation for details.
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
Dictionary containing created lineage information:
|
|
316
|
+
{
|
|
317
|
+
'guid': str, # Unique identifier
|
|
318
|
+
'name': str, # Resource name
|
|
319
|
+
'status': str, # Creation status
|
|
320
|
+
'attributes': dict, # Resource attributes
|
|
321
|
+
'createTime': int # Creation timestamp
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
Raises:
|
|
325
|
+
ValueError: When required parameters are missing or invalid:
|
|
326
|
+
- Empty or None values for required fields
|
|
327
|
+
- Invalid GUID format
|
|
328
|
+
- Out-of-range values
|
|
329
|
+
|
|
330
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
331
|
+
- DefaultAzureCredential not configured
|
|
332
|
+
- Insufficient permissions
|
|
333
|
+
- Expired authentication token
|
|
334
|
+
|
|
335
|
+
HTTPError: When Purview API returns error:
|
|
336
|
+
- 400: Bad request (invalid parameters)
|
|
337
|
+
- 401: Unauthorized (authentication failed)
|
|
338
|
+
- 403: Forbidden (insufficient permissions)
|
|
339
|
+
- 404: Resource not found
|
|
340
|
+
- 409: Conflict (resource already exists)
|
|
341
|
+
- 429: Rate limit exceeded
|
|
342
|
+
- 500: Internal server error
|
|
343
|
+
|
|
344
|
+
NetworkError: When network connectivity fails
|
|
345
|
+
|
|
346
|
+
Example:
|
|
347
|
+
# Basic usage
|
|
348
|
+
client = Lineage()
|
|
349
|
+
|
|
350
|
+
result = client.lineageReadDownstream(args=...)
|
|
351
|
+
print(f"Result: {result}")
|
|
352
|
+
|
|
353
|
+
# With detailed data
|
|
354
|
+
data = {
|
|
355
|
+
'name': 'My Resource',
|
|
356
|
+
'description': 'Resource description',
|
|
357
|
+
'attributes': {
|
|
358
|
+
'key1': 'value1',
|
|
359
|
+
'key2': 'value2'
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
result = client.lineageReadDownstream(data)
|
|
364
|
+
print(f"Created/Updated: {result['guid']}")
|
|
365
|
+
|
|
366
|
+
Use Cases:
|
|
367
|
+
- Data Onboarding: Register new data sources in catalog
|
|
368
|
+
- Metadata Management: Add descriptive metadata to assets
|
|
369
|
+
- Automation: Programmatically populate catalog
|
|
370
|
+
"""
|
|
371
|
+
self.method = "GET"
|
|
372
|
+
self.endpoint = ENDPOINTS["lineage"]["get_downstream_lineage"].format(guid=args["--guid"])
|
|
373
|
+
self.params = {
|
|
374
|
+
**get_api_version_params("datamap"),
|
|
375
|
+
"depth": args.get("--depth", 3),
|
|
376
|
+
"width": args.get("--width", 10),
|
|
377
|
+
"includeParent": str(args.get("--includeParent", False)).lower(),
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
@decorator
|
|
381
|
+
def lineageReadGraph(self, args):
|
|
382
|
+
"""
|
|
383
|
+
Retrieve lineage information information.
|
|
384
|
+
|
|
385
|
+
Retrieves detailed information about the specified lineage information.
|
|
386
|
+
Returns complete lineage information metadata and properties.
|
|
387
|
+
|
|
388
|
+
Args:
|
|
389
|
+
args: Dictionary of operation arguments.
|
|
390
|
+
Contains operation-specific parameters.
|
|
391
|
+
See method implementation for details.
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
Dictionary containing lineage information information:
|
|
395
|
+
{
|
|
396
|
+
'guid': str, # Unique identifier
|
|
397
|
+
'name': str, # Resource name
|
|
398
|
+
'attributes': dict, # Resource attributes
|
|
399
|
+
'status': str, # Resource status
|
|
400
|
+
'updateTime': int # Last update timestamp
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
Raises:
|
|
404
|
+
ValueError: When required parameters are missing or invalid:
|
|
405
|
+
- Empty or None values for required fields
|
|
406
|
+
- Invalid GUID format
|
|
407
|
+
- Out-of-range values
|
|
408
|
+
|
|
409
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
410
|
+
- DefaultAzureCredential not configured
|
|
411
|
+
- Insufficient permissions
|
|
412
|
+
- Expired authentication token
|
|
413
|
+
|
|
414
|
+
HTTPError: When Purview API returns error:
|
|
415
|
+
- 400: Bad request (invalid parameters)
|
|
416
|
+
- 401: Unauthorized (authentication failed)
|
|
417
|
+
- 403: Forbidden (insufficient permissions)
|
|
418
|
+
- 404: Resource not found
|
|
419
|
+
- 429: Rate limit exceeded
|
|
420
|
+
- 500: Internal server error
|
|
421
|
+
|
|
422
|
+
NetworkError: When network connectivity fails
|
|
423
|
+
|
|
424
|
+
Example:
|
|
425
|
+
# Basic usage
|
|
426
|
+
client = Lineage()
|
|
427
|
+
|
|
428
|
+
result = client.lineageReadGraph(args=...)
|
|
429
|
+
print(f"Result: {result}")
|
|
430
|
+
|
|
431
|
+
Use Cases:
|
|
432
|
+
- Data Discovery: Find and explore data assets
|
|
433
|
+
- Compliance Auditing: Review metadata and classifications
|
|
434
|
+
- Reporting: Generate catalog reports
|
|
435
|
+
"""
|
|
436
|
+
self.method = "GET"
|
|
437
|
+
self.endpoint = ENDPOINTS["lineage"]["get_lineage_graph"].format(guid=args["--guid"])
|
|
438
|
+
self.params = {
|
|
439
|
+
**get_api_version_params("datamap"),
|
|
440
|
+
"direction": args.get("--direction", "BOTH"),
|
|
441
|
+
"depth": args.get("--depth", 3),
|
|
442
|
+
"includeProcesses": str(args.get("--includeProcesses", True)).lower(),
|
|
443
|
+
"format": args.get("--format", "json"),
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
@decorator
|
|
447
|
+
def lineageCreate(self, args):
|
|
448
|
+
"""
|
|
449
|
+
Create a new lineage information.
|
|
450
|
+
|
|
451
|
+
Creates a new lineage information in Microsoft Purview Data Lineage. Tracks data flow and transformations.
|
|
452
|
+
Requires appropriate permissions and valid lineage information definition.
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
args: Dictionary of operation arguments.
|
|
456
|
+
Contains operation-specific parameters.
|
|
457
|
+
See method implementation for details.
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
Dictionary containing created lineage information:
|
|
461
|
+
{
|
|
462
|
+
'guid': str, # Unique identifier
|
|
463
|
+
'name': str, # Resource name
|
|
464
|
+
'status': str, # Creation status
|
|
465
|
+
'attributes': dict, # Resource attributes
|
|
466
|
+
'createTime': int # Creation timestamp
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
Raises:
|
|
470
|
+
ValueError: When required parameters are missing or invalid:
|
|
471
|
+
- Empty or None values for required fields
|
|
472
|
+
- Invalid GUID format
|
|
473
|
+
- Out-of-range values
|
|
474
|
+
|
|
475
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
476
|
+
- DefaultAzureCredential not configured
|
|
477
|
+
- Insufficient permissions
|
|
478
|
+
- Expired authentication token
|
|
479
|
+
|
|
480
|
+
HTTPError: When Purview API returns error:
|
|
481
|
+
- 400: Bad request (invalid parameters)
|
|
482
|
+
- 401: Unauthorized (authentication failed)
|
|
483
|
+
- 403: Forbidden (insufficient permissions)
|
|
484
|
+
- 404: Resource not found
|
|
485
|
+
- 409: Conflict (resource already exists)
|
|
486
|
+
- 429: Rate limit exceeded
|
|
487
|
+
- 500: Internal server error
|
|
488
|
+
|
|
489
|
+
NetworkError: When network connectivity fails
|
|
490
|
+
|
|
491
|
+
Example:
|
|
492
|
+
# Basic usage
|
|
493
|
+
client = Lineage()
|
|
494
|
+
|
|
495
|
+
result = client.lineageCreate(args=...)
|
|
496
|
+
print(f"Result: {result}")
|
|
497
|
+
|
|
498
|
+
# With detailed data
|
|
499
|
+
data = {
|
|
500
|
+
'name': 'My Resource',
|
|
501
|
+
'description': 'Resource description',
|
|
502
|
+
'attributes': {
|
|
503
|
+
'key1': 'value1',
|
|
504
|
+
'key2': 'value2'
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
result = client.lineageCreate(data)
|
|
509
|
+
print(f"Created/Updated: {result['guid']}")
|
|
510
|
+
|
|
511
|
+
Use Cases:
|
|
512
|
+
- Data Onboarding: Register new data sources in catalog
|
|
513
|
+
- Metadata Management: Add descriptive metadata to assets
|
|
514
|
+
- Automation: Programmatically populate catalog
|
|
515
|
+
"""
|
|
516
|
+
self.method = "POST"
|
|
517
|
+
self.endpoint = ENDPOINTS["lineage"]["create_lineage"]
|
|
518
|
+
self.params = get_api_version_params("datamap")
|
|
519
|
+
self.payload = get_json(args, "--payloadFile")
|
|
520
|
+
|
|
521
|
+
@decorator
|
|
522
|
+
def lineageUpdate(self, args):
|
|
523
|
+
"""
|
|
524
|
+
Update an existing lineage information.
|
|
525
|
+
|
|
526
|
+
Updates an existing lineage information with new values.
|
|
527
|
+
Only specified fields are modified; others remain unchanged.
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
args: Dictionary of operation arguments.
|
|
531
|
+
Contains operation-specific parameters.
|
|
532
|
+
See method implementation for details.
|
|
533
|
+
|
|
534
|
+
Returns:
|
|
535
|
+
Dictionary containing updated lineage information:
|
|
536
|
+
{
|
|
537
|
+
'guid': str, # Unique identifier
|
|
538
|
+
'attributes': dict, # Updated attributes
|
|
539
|
+
'updateTime': int # Update timestamp
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
Raises:
|
|
543
|
+
ValueError: When required parameters are missing or invalid:
|
|
544
|
+
- Empty or None values for required fields
|
|
545
|
+
- Invalid GUID format
|
|
546
|
+
- Out-of-range values
|
|
547
|
+
|
|
548
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
549
|
+
- DefaultAzureCredential not configured
|
|
550
|
+
- Insufficient permissions
|
|
551
|
+
- Expired authentication token
|
|
552
|
+
|
|
553
|
+
HTTPError: When Purview API returns error:
|
|
554
|
+
- 400: Bad request (invalid parameters)
|
|
555
|
+
- 401: Unauthorized (authentication failed)
|
|
556
|
+
- 403: Forbidden (insufficient permissions)
|
|
557
|
+
- 404: Resource not found
|
|
558
|
+
- 429: Rate limit exceeded
|
|
559
|
+
- 500: Internal server error
|
|
560
|
+
|
|
561
|
+
NetworkError: When network connectivity fails
|
|
562
|
+
|
|
563
|
+
Example:
|
|
564
|
+
# Basic usage
|
|
565
|
+
client = Lineage()
|
|
566
|
+
|
|
567
|
+
result = client.lineageUpdate(args=...)
|
|
568
|
+
print(f"Result: {result}")
|
|
569
|
+
|
|
570
|
+
# With detailed data
|
|
571
|
+
data = {
|
|
572
|
+
'name': 'My Resource',
|
|
573
|
+
'description': 'Resource description',
|
|
574
|
+
'attributes': {
|
|
575
|
+
'key1': 'value1',
|
|
576
|
+
'key2': 'value2'
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
result = client.lineageUpdate(data)
|
|
581
|
+
print(f"Created/Updated: {result['guid']}")
|
|
582
|
+
|
|
583
|
+
Use Cases:
|
|
584
|
+
- Metadata Enrichment: Update descriptions and tags
|
|
585
|
+
- Ownership Changes: Reassign data ownership
|
|
586
|
+
- Classification: Apply or modify data classifications
|
|
587
|
+
"""
|
|
588
|
+
self.method = "PUT"
|
|
589
|
+
self.endpoint = ENDPOINTS["lineage"]["update_lineage"].format(guid=args["--guid"])
|
|
590
|
+
self.params = get_api_version_params("datamap")
|
|
591
|
+
self.payload = get_json(args, "--payloadFile")
|
|
592
|
+
|
|
593
|
+
@decorator
|
|
594
|
+
def lineageDelete(self, args):
|
|
595
|
+
"""
|
|
596
|
+
Delete a lineage information.
|
|
597
|
+
|
|
598
|
+
Permanently deletes the specified lineage information.
|
|
599
|
+
This operation cannot be undone. Use with caution.
|
|
600
|
+
|
|
601
|
+
Args:
|
|
602
|
+
args: Dictionary of operation arguments.
|
|
603
|
+
Contains operation-specific parameters.
|
|
604
|
+
See method implementation for details.
|
|
605
|
+
|
|
606
|
+
Returns:
|
|
607
|
+
Dictionary with deletion status:
|
|
608
|
+
{
|
|
609
|
+
'guid': str, # Deleted resource ID
|
|
610
|
+
'status': str, # Deletion status
|
|
611
|
+
'message': str # Confirmation message
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
Raises:
|
|
615
|
+
ValueError: When required parameters are missing or invalid:
|
|
616
|
+
- Empty or None values for required fields
|
|
617
|
+
- Invalid GUID format
|
|
618
|
+
- Out-of-range values
|
|
619
|
+
|
|
620
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
621
|
+
- DefaultAzureCredential not configured
|
|
622
|
+
- Insufficient permissions
|
|
623
|
+
- Expired authentication token
|
|
624
|
+
|
|
625
|
+
HTTPError: When Purview API returns error:
|
|
626
|
+
- 400: Bad request (invalid parameters)
|
|
627
|
+
- 401: Unauthorized (authentication failed)
|
|
628
|
+
- 403: Forbidden (insufficient permissions)
|
|
629
|
+
- 404: Resource not found
|
|
630
|
+
- 429: Rate limit exceeded
|
|
631
|
+
- 500: Internal server error
|
|
632
|
+
|
|
633
|
+
NetworkError: When network connectivity fails
|
|
634
|
+
|
|
635
|
+
Example:
|
|
636
|
+
# Basic usage
|
|
637
|
+
client = Lineage()
|
|
638
|
+
|
|
639
|
+
result = client.lineageDelete(args=...)
|
|
640
|
+
print(f"Result: {result}")
|
|
641
|
+
|
|
642
|
+
Use Cases:
|
|
643
|
+
- Data Cleanup: Remove obsolete or test data
|
|
644
|
+
- Decommissioning: Delete resources no longer in use
|
|
645
|
+
- Testing: Clean up test environments
|
|
646
|
+
"""
|
|
647
|
+
self.method = "DELETE"
|
|
648
|
+
self.endpoint = ENDPOINTS["lineage"]["delete_lineage"].format(guid=args["--guid"])
|
|
649
|
+
self.params = get_api_version_params("datamap")
|
|
650
|
+
|
|
651
|
+
@decorator
|
|
652
|
+
def lineageValidate(self, args):
|
|
653
|
+
"""
|
|
654
|
+
Perform operation on resource.
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
Args:
|
|
659
|
+
args: Dictionary of operation arguments.
|
|
660
|
+
Contains operation-specific parameters.
|
|
661
|
+
See method implementation for details.
|
|
662
|
+
|
|
663
|
+
Returns:
|
|
664
|
+
[TODO: Specify return type and structure]
|
|
665
|
+
[TODO: Document nested fields]
|
|
666
|
+
|
|
667
|
+
Raises:
|
|
668
|
+
ValueError: When required parameters are missing or invalid:
|
|
669
|
+
- Empty or None values for required fields
|
|
670
|
+
- Invalid GUID format
|
|
671
|
+
- Out-of-range values
|
|
672
|
+
|
|
673
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
674
|
+
- DefaultAzureCredential not configured
|
|
675
|
+
- Insufficient permissions
|
|
676
|
+
- Expired authentication token
|
|
677
|
+
|
|
678
|
+
HTTPError: When Purview API returns error:
|
|
679
|
+
- 400: Bad request (invalid parameters)
|
|
680
|
+
- 401: Unauthorized (authentication failed)
|
|
681
|
+
- 403: Forbidden (insufficient permissions)
|
|
682
|
+
- 404: Resource not found
|
|
683
|
+
- 429: Rate limit exceeded
|
|
684
|
+
- 500: Internal server error
|
|
685
|
+
|
|
686
|
+
NetworkError: When network connectivity fails
|
|
687
|
+
|
|
688
|
+
Example:
|
|
689
|
+
# Basic usage
|
|
690
|
+
client = Lineage()
|
|
691
|
+
|
|
692
|
+
result = client.lineageValidate(args=...)
|
|
693
|
+
print(f"Result: {result}")
|
|
694
|
+
|
|
695
|
+
Use Cases:
|
|
696
|
+
- [TODO: Add specific use cases for this operation]
|
|
697
|
+
- [TODO: Include business context]
|
|
698
|
+
- [TODO: Explain when to use this method]
|
|
699
|
+
"""
|
|
700
|
+
self.method = "POST"
|
|
701
|
+
self.endpoint = ENDPOINTS["lineage"]["validate_lineage"]
|
|
702
|
+
self.params = get_api_version_params("datamap")
|
|
703
|
+
self.payload = get_json(args, "--payloadFile")
|
|
704
|
+
|
|
705
|
+
@decorator
|
|
706
|
+
def lineageReadImpactAnalysis(self, args):
|
|
707
|
+
"""
|
|
708
|
+
Retrieve lineage information information.
|
|
709
|
+
|
|
710
|
+
Retrieves detailed information about the specified lineage information.
|
|
711
|
+
Returns complete lineage information metadata and properties.
|
|
712
|
+
|
|
713
|
+
Args:
|
|
714
|
+
args: Dictionary of operation arguments.
|
|
715
|
+
Contains operation-specific parameters.
|
|
716
|
+
See method implementation for details.
|
|
717
|
+
|
|
718
|
+
Returns:
|
|
719
|
+
Dictionary containing lineage information information:
|
|
720
|
+
{
|
|
721
|
+
'guid': str, # Unique identifier
|
|
722
|
+
'name': str, # Resource name
|
|
723
|
+
'attributes': dict, # Resource attributes
|
|
724
|
+
'status': str, # Resource status
|
|
725
|
+
'updateTime': int # Last update timestamp
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
Raises:
|
|
729
|
+
ValueError: When required parameters are missing or invalid:
|
|
730
|
+
- Empty or None values for required fields
|
|
731
|
+
- Invalid GUID format
|
|
732
|
+
- Out-of-range values
|
|
733
|
+
|
|
734
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
735
|
+
- DefaultAzureCredential not configured
|
|
736
|
+
- Insufficient permissions
|
|
737
|
+
- Expired authentication token
|
|
738
|
+
|
|
739
|
+
HTTPError: When Purview API returns error:
|
|
740
|
+
- 400: Bad request (invalid parameters)
|
|
741
|
+
- 401: Unauthorized (authentication failed)
|
|
742
|
+
- 403: Forbidden (insufficient permissions)
|
|
743
|
+
- 404: Resource not found
|
|
744
|
+
- 429: Rate limit exceeded
|
|
745
|
+
- 500: Internal server error
|
|
746
|
+
|
|
747
|
+
NetworkError: When network connectivity fails
|
|
748
|
+
|
|
749
|
+
Example:
|
|
750
|
+
# Basic usage
|
|
751
|
+
client = Lineage()
|
|
752
|
+
|
|
753
|
+
result = client.lineageReadImpactAnalysis(args=...)
|
|
754
|
+
print(f"Result: {result}")
|
|
755
|
+
|
|
756
|
+
Use Cases:
|
|
757
|
+
- Data Discovery: Find and explore data assets
|
|
758
|
+
- Compliance Auditing: Review metadata and classifications
|
|
759
|
+
- Reporting: Generate catalog reports
|
|
760
|
+
"""
|
|
761
|
+
self.method = "GET"
|
|
762
|
+
self.endpoint = ENDPOINTS["lineage"]["get_impact_analysis"].format(guid=args["--guid"])
|
|
763
|
+
self.params = {
|
|
764
|
+
**get_api_version_params("datamap"),
|
|
765
|
+
"direction": args.get("--direction", "DOWNSTREAM"),
|
|
766
|
+
"depth": args.get("--depth", 5),
|
|
767
|
+
"analysisType": args.get("--analysisType", "IMPACT"),
|
|
768
|
+
"includeProcesses": str(args.get("--includeProcesses", True)).lower(),
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
@decorator
|
|
772
|
+
def lineageReadTemporal(self, args):
|
|
773
|
+
"""
|
|
774
|
+
Retrieve lineage information information.
|
|
775
|
+
|
|
776
|
+
Retrieves detailed information about the specified lineage information.
|
|
777
|
+
Returns complete lineage information metadata and properties.
|
|
778
|
+
|
|
779
|
+
Args:
|
|
780
|
+
args: Dictionary of operation arguments.
|
|
781
|
+
Contains operation-specific parameters.
|
|
782
|
+
See method implementation for details.
|
|
783
|
+
|
|
784
|
+
Returns:
|
|
785
|
+
Dictionary containing lineage information information:
|
|
786
|
+
{
|
|
787
|
+
'guid': str, # Unique identifier
|
|
788
|
+
'name': str, # Resource name
|
|
789
|
+
'attributes': dict, # Resource attributes
|
|
790
|
+
'status': str, # Resource status
|
|
791
|
+
'updateTime': int # Last update timestamp
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
Raises:
|
|
795
|
+
ValueError: When required parameters are missing or invalid:
|
|
796
|
+
- Empty or None values for required fields
|
|
797
|
+
- Invalid GUID format
|
|
798
|
+
- Out-of-range values
|
|
799
|
+
|
|
800
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
801
|
+
- DefaultAzureCredential not configured
|
|
802
|
+
- Insufficient permissions
|
|
803
|
+
- Expired authentication token
|
|
804
|
+
|
|
805
|
+
HTTPError: When Purview API returns error:
|
|
806
|
+
- 400: Bad request (invalid parameters)
|
|
807
|
+
- 401: Unauthorized (authentication failed)
|
|
808
|
+
- 403: Forbidden (insufficient permissions)
|
|
809
|
+
- 404: Resource not found
|
|
810
|
+
- 429: Rate limit exceeded
|
|
811
|
+
- 500: Internal server error
|
|
812
|
+
|
|
813
|
+
NetworkError: When network connectivity fails
|
|
814
|
+
|
|
815
|
+
Example:
|
|
816
|
+
# Basic usage
|
|
817
|
+
client = Lineage()
|
|
818
|
+
|
|
819
|
+
result = client.lineageReadTemporal(args=...)
|
|
820
|
+
print(f"Result: {result}")
|
|
821
|
+
|
|
822
|
+
Use Cases:
|
|
823
|
+
- Data Discovery: Find and explore data assets
|
|
824
|
+
- Compliance Auditing: Review metadata and classifications
|
|
825
|
+
- Reporting: Generate catalog reports
|
|
826
|
+
"""
|
|
827
|
+
self.method = "GET"
|
|
828
|
+
self.endpoint = ENDPOINTS["lineage"]["get_temporal_lineage"].format(guid=args["--guid"])
|
|
829
|
+
self.params = {
|
|
830
|
+
**get_api_version_params("datamap"),
|
|
831
|
+
"startTime": args.get("--startTime"),
|
|
832
|
+
"endTime": args.get("--endTime"),
|
|
833
|
+
"timeGranularity": args.get("--timeGranularity", "HOUR"),
|
|
834
|
+
"direction": args.get("--direction", "BOTH"),
|
|
835
|
+
"depth": args.get("--depth", 3),
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
# === BULK LINEAGE OPERATIONS (FOR CSV SUPPORT) ===
|
|
839
|
+
|
|
840
|
+
@decorator
|
|
841
|
+
def lineageCreateBulk(self, args):
|
|
842
|
+
"""
|
|
843
|
+
Create a new lineage information.
|
|
844
|
+
|
|
845
|
+
Creates a new lineage information in Microsoft Purview Data Lineage. Tracks data flow and transformations.
|
|
846
|
+
Requires appropriate permissions and valid lineage information definition.
|
|
847
|
+
|
|
848
|
+
Args:
|
|
849
|
+
args: Dictionary of operation arguments.
|
|
850
|
+
Contains operation-specific parameters.
|
|
851
|
+
See method implementation for details.
|
|
852
|
+
|
|
853
|
+
Returns:
|
|
854
|
+
Dictionary containing created lineage information:
|
|
855
|
+
{
|
|
856
|
+
'guid': str, # Unique identifier
|
|
857
|
+
'name': str, # Resource name
|
|
858
|
+
'status': str, # Creation status
|
|
859
|
+
'attributes': dict, # Resource attributes
|
|
860
|
+
'createTime': int # Creation timestamp
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
Raises:
|
|
864
|
+
ValueError: When required parameters are missing or invalid:
|
|
865
|
+
- Empty or None values for required fields
|
|
866
|
+
- Invalid GUID format
|
|
867
|
+
- Out-of-range values
|
|
868
|
+
|
|
869
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
870
|
+
- DefaultAzureCredential not configured
|
|
871
|
+
- Insufficient permissions
|
|
872
|
+
- Expired authentication token
|
|
873
|
+
|
|
874
|
+
HTTPError: When Purview API returns error:
|
|
875
|
+
- 400: Bad request (invalid parameters)
|
|
876
|
+
- 401: Unauthorized (authentication failed)
|
|
877
|
+
- 403: Forbidden (insufficient permissions)
|
|
878
|
+
- 404: Resource not found
|
|
879
|
+
- 409: Conflict (resource already exists)
|
|
880
|
+
- 429: Rate limit exceeded
|
|
881
|
+
- 500: Internal server error
|
|
882
|
+
|
|
883
|
+
NetworkError: When network connectivity fails
|
|
884
|
+
|
|
885
|
+
Example:
|
|
886
|
+
# Basic usage
|
|
887
|
+
client = Lineage()
|
|
888
|
+
|
|
889
|
+
result = client.lineageCreateBulk(args=...)
|
|
890
|
+
print(f"Result: {result}")
|
|
891
|
+
|
|
892
|
+
# With detailed data
|
|
893
|
+
data = {
|
|
894
|
+
'name': 'My Resource',
|
|
895
|
+
'description': 'Resource description',
|
|
896
|
+
'attributes': {
|
|
897
|
+
'key1': 'value1',
|
|
898
|
+
'key2': 'value2'
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
result = client.lineageCreateBulk(data)
|
|
903
|
+
print(f"Created/Updated: {result['guid']}")
|
|
904
|
+
|
|
905
|
+
Use Cases:
|
|
906
|
+
- Data Onboarding: Register new data sources in catalog
|
|
907
|
+
- Metadata Management: Add descriptive metadata to assets
|
|
908
|
+
- Automation: Programmatically populate catalog
|
|
909
|
+
"""
|
|
910
|
+
self.method = "POST"
|
|
911
|
+
self.endpoint = ENDPOINTS["lineage"]["create_lineage"]
|
|
912
|
+
self.params = get_api_version_params("datamap")
|
|
913
|
+
|
|
914
|
+
# Process input file (CSV or JSON)
|
|
915
|
+
input_file = args.get("--inputFile")
|
|
916
|
+
if input_file:
|
|
917
|
+
lineage_data = self._process_lineage_file(input_file, args)
|
|
918
|
+
else:
|
|
919
|
+
lineage_data = get_json(args, "--payloadFile")
|
|
920
|
+
|
|
921
|
+
self.payload = lineage_data
|
|
922
|
+
|
|
923
|
+
def _process_lineage_file(self, input_file, args):
|
|
924
|
+
"""Process lineage input file (CSV or JSON) and convert to API format"""
|
|
925
|
+
import pandas as pd
|
|
926
|
+
import os
|
|
927
|
+
|
|
928
|
+
file_ext = os.path.splitext(input_file)[1].lower()
|
|
929
|
+
|
|
930
|
+
if file_ext == '.csv':
|
|
931
|
+
return self._process_csv_lineage(input_file, args)
|
|
932
|
+
elif file_ext == '.json':
|
|
933
|
+
with open(input_file, 'r') as f:
|
|
934
|
+
return json.load(f)
|
|
935
|
+
else:
|
|
936
|
+
raise ValueError(f"Unsupported file format: {file_ext}. Supported formats: .csv, .json")
|
|
937
|
+
|
|
938
|
+
def _process_csv_direct_lineage(self, csv_file, df, args):
|
|
939
|
+
"""Process CSV file for direct lineage relationships (UI-style)"""
|
|
940
|
+
import pandas as pd
|
|
941
|
+
|
|
942
|
+
# Create direct lineage relationships
|
|
943
|
+
relationships = []
|
|
944
|
+
|
|
945
|
+
for idx, row in df.iterrows():
|
|
946
|
+
# Get relationship type
|
|
947
|
+
relationship_type = str(row.get('relationship_type', 'direct_lineage_dataset_dataset')).strip()
|
|
948
|
+
|
|
949
|
+
# Clean GUIDs
|
|
950
|
+
source_guid = str(row['source_entity_guid']).strip().replace('guid=', '').strip('"')
|
|
951
|
+
target_guid = str(row['target_entity_guid']).strip().replace('guid=', '').strip('"')
|
|
952
|
+
|
|
953
|
+
# Get entity types
|
|
954
|
+
source_type = row.get('source_type', 'DataSet')
|
|
955
|
+
target_type = row.get('target_type', 'DataSet')
|
|
956
|
+
|
|
957
|
+
# Get column mapping if present
|
|
958
|
+
column_mapping = str(row.get('columnMapping', row.get('column_mapping', '')))
|
|
959
|
+
|
|
960
|
+
# Create direct lineage relationship
|
|
961
|
+
relationship = {
|
|
962
|
+
"typeName": relationship_type,
|
|
963
|
+
"guid": f"-{idx + 1}", # Negative GUID for auto-generation
|
|
964
|
+
"end1": {
|
|
965
|
+
"guid": source_guid,
|
|
966
|
+
"typeName": source_type
|
|
967
|
+
},
|
|
968
|
+
"end2": {
|
|
969
|
+
"guid": target_guid,
|
|
970
|
+
"typeName": target_type
|
|
971
|
+
},
|
|
972
|
+
"attributes": {
|
|
973
|
+
"columnMapping": column_mapping
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
relationships.append(relationship)
|
|
978
|
+
|
|
979
|
+
# Return format for relationship creation
|
|
980
|
+
return {
|
|
981
|
+
"relationships": relationships
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
def _process_csv_lineage(self, csv_file, args):
|
|
985
|
+
"""Process CSV file and convert to lineage API format"""
|
|
986
|
+
import pandas as pd
|
|
987
|
+
|
|
988
|
+
# Read CSV file
|
|
989
|
+
df = pd.read_csv(csv_file)
|
|
990
|
+
|
|
991
|
+
# Determine which format is being used (GUID-based or qualified name-based)
|
|
992
|
+
has_guid_columns = 'source_entity_guid' in df.columns and 'target_entity_guid' in df.columns
|
|
993
|
+
has_qn_columns = 'source_qualified_name' in df.columns and 'target_qualified_name' in df.columns
|
|
994
|
+
|
|
995
|
+
if not has_guid_columns and not has_qn_columns:
|
|
996
|
+
raise ValueError(
|
|
997
|
+
"CSV must contain either (source_entity_guid, target_entity_guid) "
|
|
998
|
+
"or (source_qualified_name, target_qualified_name) columns"
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
# Check if any row uses direct_lineage_dataset_dataset type
|
|
1002
|
+
# If so, we'll create relationships instead of Process entities
|
|
1003
|
+
use_direct_lineage = False
|
|
1004
|
+
if 'relationship_type' in df.columns:
|
|
1005
|
+
use_direct_lineage = any(df['relationship_type'].str.contains('direct_lineage_dataset_dataset', na=False))
|
|
1006
|
+
|
|
1007
|
+
if use_direct_lineage:
|
|
1008
|
+
# Create direct relationships (UI-style lineage)
|
|
1009
|
+
return self._process_csv_direct_lineage(csv_file, df, args)
|
|
1010
|
+
|
|
1011
|
+
# Generate lineage entities (relationships are defined via inputs/outputs attributes)
|
|
1012
|
+
lineage_entities = []
|
|
1013
|
+
|
|
1014
|
+
for idx, row in df.iterrows():
|
|
1015
|
+
# Create process entity for each lineage relationship
|
|
1016
|
+
# Use unique negative GUIDs (-1, -2, -3, ...) to let Atlas auto-generate the GUID for each Process
|
|
1017
|
+
process_guid = f"-{idx + 1}"
|
|
1018
|
+
process_name = row.get('process_name', f"Process_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{idx}")
|
|
1019
|
+
|
|
1020
|
+
# Prepare inputs/outputs based on format
|
|
1021
|
+
if has_guid_columns:
|
|
1022
|
+
# Clean GUIDs (remove guid= prefix and quotes)
|
|
1023
|
+
source_guid = str(row['source_entity_guid']).strip().replace('guid=', '').strip('"')
|
|
1024
|
+
target_guid = str(row['target_entity_guid']).strip().replace('guid=', '').strip('"')
|
|
1025
|
+
|
|
1026
|
+
inputs = [{"guid": source_guid, "typeName": row.get('source_type', 'DataSet')}]
|
|
1027
|
+
outputs = [{"guid": target_guid, "typeName": row.get('target_type', 'DataSet')}]
|
|
1028
|
+
else:
|
|
1029
|
+
inputs = [{"typeName": row.get('source_type', 'DataSet'), "uniqueAttributes": {"qualifiedName": row['source_qualified_name']}}]
|
|
1030
|
+
outputs = [{"typeName": row.get('target_type', 'DataSet'), "uniqueAttributes": {"qualifiedName": row['target_qualified_name']}}]
|
|
1031
|
+
|
|
1032
|
+
# Process entity - let Atlas generate the GUID
|
|
1033
|
+
process_entity = {
|
|
1034
|
+
"guid": process_guid,
|
|
1035
|
+
"typeName": "Process",
|
|
1036
|
+
"attributes": {
|
|
1037
|
+
"qualifiedName": f"{process_name}@{args.get('--cluster', 'default')}",
|
|
1038
|
+
"name": process_name,
|
|
1039
|
+
"description": str(row.get('description', '')),
|
|
1040
|
+
"owner": str(row.get('owner', '')),
|
|
1041
|
+
"inputs": inputs,
|
|
1042
|
+
"outputs": outputs
|
|
1043
|
+
},
|
|
1044
|
+
"classifications": [],
|
|
1045
|
+
"meanings": []
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
# Add custom attributes if present
|
|
1049
|
+
custom_attrs = ['confidence_score', 'metadata', 'tags']
|
|
1050
|
+
for attr in custom_attrs:
|
|
1051
|
+
if attr in row and pd.notna(row[attr]) and str(row[attr]).strip():
|
|
1052
|
+
if attr == 'tags':
|
|
1053
|
+
process_entity["attributes"][attr] = str(row[attr]).split(',')
|
|
1054
|
+
elif attr == 'metadata':
|
|
1055
|
+
try:
|
|
1056
|
+
process_entity["attributes"][attr] = json.loads(str(row[attr]))
|
|
1057
|
+
except json.JSONDecodeError:
|
|
1058
|
+
process_entity["attributes"][attr] = str(row[attr])
|
|
1059
|
+
else:
|
|
1060
|
+
process_entity["attributes"][attr] = row[attr]
|
|
1061
|
+
|
|
1062
|
+
lineage_entities.append(process_entity)
|
|
1063
|
+
|
|
1064
|
+
# Note: Relationships are now defined via the inputs/outputs attributes in the Process entity
|
|
1065
|
+
# No need to create separate relationship objects
|
|
1066
|
+
|
|
1067
|
+
return {
|
|
1068
|
+
"entities": lineage_entities,
|
|
1069
|
+
"referredEntities": {}
|
|
1070
|
+
}
|
|
1071
|
+
|
|
1072
|
+
# === CSV LINEAGE OPERATIONS ===
|
|
1073
|
+
|
|
1074
|
+
@decorator
|
|
1075
|
+
def lineageCSVProcess(self, args):
|
|
1076
|
+
"""
|
|
1077
|
+
Perform operation on resource.
|
|
1078
|
+
|
|
1079
|
+
|
|
1080
|
+
|
|
1081
|
+
Args:
|
|
1082
|
+
args: Dictionary of operation arguments.
|
|
1083
|
+
Contains operation-specific parameters.
|
|
1084
|
+
See method implementation for details.
|
|
1085
|
+
|
|
1086
|
+
Returns:
|
|
1087
|
+
[TODO: Specify return type and structure]
|
|
1088
|
+
[TODO: Document nested fields]
|
|
1089
|
+
|
|
1090
|
+
Raises:
|
|
1091
|
+
ValueError: When required parameters are missing or invalid:
|
|
1092
|
+
- Empty or None values for required fields
|
|
1093
|
+
- Invalid GUID format
|
|
1094
|
+
- Out-of-range values
|
|
1095
|
+
|
|
1096
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
1097
|
+
- DefaultAzureCredential not configured
|
|
1098
|
+
- Insufficient permissions
|
|
1099
|
+
- Expired authentication token
|
|
1100
|
+
|
|
1101
|
+
HTTPError: When Purview API returns error:
|
|
1102
|
+
- 400: Bad request (invalid parameters)
|
|
1103
|
+
- 401: Unauthorized (authentication failed)
|
|
1104
|
+
- 403: Forbidden (insufficient permissions)
|
|
1105
|
+
- 404: Resource not found
|
|
1106
|
+
- 429: Rate limit exceeded
|
|
1107
|
+
- 500: Internal server error
|
|
1108
|
+
|
|
1109
|
+
NetworkError: When network connectivity fails
|
|
1110
|
+
|
|
1111
|
+
Example:
|
|
1112
|
+
# Basic usage
|
|
1113
|
+
client = Lineage()
|
|
1114
|
+
|
|
1115
|
+
result = client.lineageCSVProcess(args=...)
|
|
1116
|
+
print(f"Result: {result}")
|
|
1117
|
+
|
|
1118
|
+
Use Cases:
|
|
1119
|
+
- [TODO: Add specific use cases for this operation]
|
|
1120
|
+
- [TODO: Include business context]
|
|
1121
|
+
- [TODO: Explain when to use this method]
|
|
1122
|
+
"""
|
|
1123
|
+
csv_file = args.get("csv_file") or args.get("--csv-file")
|
|
1124
|
+
if not csv_file:
|
|
1125
|
+
raise ValueError("CSV file path is required")
|
|
1126
|
+
|
|
1127
|
+
# Process CSV and create lineage payload
|
|
1128
|
+
lineage_data = self._process_csv_lineage(csv_file, args)
|
|
1129
|
+
|
|
1130
|
+
# Check if this is direct lineage (relationships) or Process lineage (entities)
|
|
1131
|
+
if "relationships" in lineage_data and "entities" not in lineage_data:
|
|
1132
|
+
# Direct lineage - use relationship bulk API
|
|
1133
|
+
self.method = "POST"
|
|
1134
|
+
self.endpoint = ENDPOINTS["relationship"]["bulk_create_relationships"]
|
|
1135
|
+
self.params = get_api_version_params("datamap")
|
|
1136
|
+
self.payload = lineage_data["relationships"]
|
|
1137
|
+
else:
|
|
1138
|
+
# Process lineage - use entity bulk API
|
|
1139
|
+
self.method = "POST"
|
|
1140
|
+
self.endpoint = ENDPOINTS["entity"]["bulk_create_or_update"]
|
|
1141
|
+
self.params = get_api_version_params("datamap")
|
|
1142
|
+
self.payload = lineage_data
|
|
1143
|
+
|
|
1144
|
+
# Return the payload for inspection (actual API call handled by decorator)
|
|
1145
|
+
return lineage_data
|
|
1146
|
+
|
|
1147
|
+
def lineageCSVValidate(self, args):
|
|
1148
|
+
"""
|
|
1149
|
+
Perform operation on resource.
|
|
1150
|
+
|
|
1151
|
+
|
|
1152
|
+
|
|
1153
|
+
Args:
|
|
1154
|
+
args: Dictionary of operation arguments.
|
|
1155
|
+
Contains operation-specific parameters.
|
|
1156
|
+
See method implementation for details.
|
|
1157
|
+
|
|
1158
|
+
Returns:
|
|
1159
|
+
[TODO: Specify return type and structure]
|
|
1160
|
+
[TODO: Document nested fields]
|
|
1161
|
+
|
|
1162
|
+
Raises:
|
|
1163
|
+
ValueError: When required parameters are missing or invalid:
|
|
1164
|
+
- Empty or None values for required fields
|
|
1165
|
+
- Invalid GUID format
|
|
1166
|
+
- Out-of-range values
|
|
1167
|
+
|
|
1168
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
1169
|
+
- DefaultAzureCredential not configured
|
|
1170
|
+
- Insufficient permissions
|
|
1171
|
+
- Expired authentication token
|
|
1172
|
+
|
|
1173
|
+
HTTPError: When Purview API returns error:
|
|
1174
|
+
- 400: Bad request (invalid parameters)
|
|
1175
|
+
- 401: Unauthorized (authentication failed)
|
|
1176
|
+
- 403: Forbidden (insufficient permissions)
|
|
1177
|
+
- 404: Resource not found
|
|
1178
|
+
- 429: Rate limit exceeded
|
|
1179
|
+
- 500: Internal server error
|
|
1180
|
+
|
|
1181
|
+
NetworkError: When network connectivity fails
|
|
1182
|
+
|
|
1183
|
+
Example:
|
|
1184
|
+
# Basic usage
|
|
1185
|
+
client = Lineage()
|
|
1186
|
+
|
|
1187
|
+
result = client.lineageCSVValidate(args=...)
|
|
1188
|
+
print(f"Result: {result}")
|
|
1189
|
+
|
|
1190
|
+
Use Cases:
|
|
1191
|
+
- [TODO: Add specific use cases for this operation]
|
|
1192
|
+
- [TODO: Include business context]
|
|
1193
|
+
- [TODO: Explain when to use this method]
|
|
1194
|
+
"""
|
|
1195
|
+
import pandas as pd
|
|
1196
|
+
|
|
1197
|
+
csv_file = args.get("csv_file") or args.get("--csv-file")
|
|
1198
|
+
if not csv_file:
|
|
1199
|
+
return {"success": False, "error": "CSV file path is required"}
|
|
1200
|
+
|
|
1201
|
+
try:
|
|
1202
|
+
# Read CSV
|
|
1203
|
+
df = pd.read_csv(csv_file)
|
|
1204
|
+
|
|
1205
|
+
# Check required columns
|
|
1206
|
+
required_columns = ['source_entity_guid', 'target_entity_guid']
|
|
1207
|
+
missing_columns = [col for col in required_columns if col not in df.columns]
|
|
1208
|
+
|
|
1209
|
+
if missing_columns:
|
|
1210
|
+
return {
|
|
1211
|
+
"success": False,
|
|
1212
|
+
"error": f"Missing required columns: {', '.join(missing_columns)}",
|
|
1213
|
+
"expected_columns": required_columns
|
|
1214
|
+
}
|
|
1215
|
+
|
|
1216
|
+
# Validate GUIDs format
|
|
1217
|
+
import re
|
|
1218
|
+
guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
|
|
1219
|
+
|
|
1220
|
+
invalid_guids = []
|
|
1221
|
+
for idx, row in df.iterrows():
|
|
1222
|
+
source_guid = str(row['source_entity_guid']).strip()
|
|
1223
|
+
target_guid = str(row['target_entity_guid']).strip()
|
|
1224
|
+
|
|
1225
|
+
# Remove guid= prefix if present
|
|
1226
|
+
source_guid = source_guid.replace('guid=', '').strip('"')
|
|
1227
|
+
target_guid = target_guid.replace('guid=', '').strip('"')
|
|
1228
|
+
|
|
1229
|
+
if not guid_pattern.match(source_guid):
|
|
1230
|
+
invalid_guids.append(f"Row {int(idx) + 1}: Invalid source GUID '{source_guid}'")
|
|
1231
|
+
if not guid_pattern.match(target_guid):
|
|
1232
|
+
invalid_guids.append(f"Row {int(idx) + 1}: Invalid target GUID '{target_guid}'")
|
|
1233
|
+
|
|
1234
|
+
if invalid_guids:
|
|
1235
|
+
return {
|
|
1236
|
+
"success": False,
|
|
1237
|
+
"error": "Invalid GUID format(s) found",
|
|
1238
|
+
"details": invalid_guids
|
|
1239
|
+
}
|
|
1240
|
+
|
|
1241
|
+
return {
|
|
1242
|
+
"success": True,
|
|
1243
|
+
"rows": len(df),
|
|
1244
|
+
"columns": list(df.columns)
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
except Exception as e:
|
|
1248
|
+
return {"success": False, "error": str(e)}
|
|
1249
|
+
|
|
1250
|
+
def lineageCSVSample(self, args):
|
|
1251
|
+
"""
|
|
1252
|
+
Perform operation on resource.
|
|
1253
|
+
|
|
1254
|
+
|
|
1255
|
+
|
|
1256
|
+
Args:
|
|
1257
|
+
args: Dictionary of operation arguments.
|
|
1258
|
+
Contains operation-specific parameters.
|
|
1259
|
+
See method implementation for details.
|
|
1260
|
+
|
|
1261
|
+
Returns:
|
|
1262
|
+
[TODO: Specify return type and structure]
|
|
1263
|
+
[TODO: Document nested fields]
|
|
1264
|
+
|
|
1265
|
+
Raises:
|
|
1266
|
+
ValueError: When required parameters are missing or invalid:
|
|
1267
|
+
- Empty or None values for required fields
|
|
1268
|
+
- Invalid GUID format
|
|
1269
|
+
- Out-of-range values
|
|
1270
|
+
|
|
1271
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
1272
|
+
- DefaultAzureCredential not configured
|
|
1273
|
+
- Insufficient permissions
|
|
1274
|
+
- Expired authentication token
|
|
1275
|
+
|
|
1276
|
+
HTTPError: When Purview API returns error:
|
|
1277
|
+
- 400: Bad request (invalid parameters)
|
|
1278
|
+
- 401: Unauthorized (authentication failed)
|
|
1279
|
+
- 403: Forbidden (insufficient permissions)
|
|
1280
|
+
- 404: Resource not found
|
|
1281
|
+
- 429: Rate limit exceeded
|
|
1282
|
+
- 500: Internal server error
|
|
1283
|
+
|
|
1284
|
+
NetworkError: When network connectivity fails
|
|
1285
|
+
|
|
1286
|
+
Example:
|
|
1287
|
+
# Basic usage
|
|
1288
|
+
client = Lineage()
|
|
1289
|
+
|
|
1290
|
+
result = client.lineageCSVSample(args=...)
|
|
1291
|
+
print(f"Result: {result}")
|
|
1292
|
+
|
|
1293
|
+
Use Cases:
|
|
1294
|
+
- [TODO: Add specific use cases for this operation]
|
|
1295
|
+
- [TODO: Include business context]
|
|
1296
|
+
- [TODO: Explain when to use this method]
|
|
1297
|
+
"""
|
|
1298
|
+
sample_data = """source_entity_guid,target_entity_guid,relationship_type,process_name,description,confidence_score,owner,metadata
|
|
1299
|
+
ea3412c3-7387-4bc1-9923-11f6f6f60000,2d21eba5-b08b-4571-b31d-7bf6f6f60000,Process,ETL_Customer_Transform,Transform customer data,0.95,data-engineering,"{""tool"": ""Azure Data Factory""}"
|
|
1300
|
+
2d21eba5-b08b-4571-b31d-7bf6f6f60000,4fae348b-e960-42f7-834c-38f6f6f60000,Process,Customer_Address_Join,Join customer with address,0.90,data-engineering,"{""tool"": ""Databricks""}"
|
|
1301
|
+
"""
|
|
1302
|
+
output_file = args.get("--output-file") or args.get("output_file") or "lineage_sample.csv"
|
|
1303
|
+
|
|
1304
|
+
try:
|
|
1305
|
+
with open(output_file, 'w', encoding='utf-8') as f:
|
|
1306
|
+
f.write(sample_data)
|
|
1307
|
+
|
|
1308
|
+
return {
|
|
1309
|
+
"success": True,
|
|
1310
|
+
"file": output_file,
|
|
1311
|
+
"message": f"Sample CSV file created: {output_file}"
|
|
1312
|
+
}
|
|
1313
|
+
except Exception as e:
|
|
1314
|
+
return {"success": False, "error": str(e)}
|
|
1315
|
+
|
|
1316
|
+
def lineageCSVTemplates(self, args):
|
|
1317
|
+
"""
|
|
1318
|
+
Perform operation on resource.
|
|
1319
|
+
|
|
1320
|
+
|
|
1321
|
+
|
|
1322
|
+
Args:
|
|
1323
|
+
args: Dictionary of operation arguments.
|
|
1324
|
+
Contains operation-specific parameters.
|
|
1325
|
+
See method implementation for details.
|
|
1326
|
+
|
|
1327
|
+
Returns:
|
|
1328
|
+
[TODO: Specify return type and structure]
|
|
1329
|
+
[TODO: Document nested fields]
|
|
1330
|
+
|
|
1331
|
+
Raises:
|
|
1332
|
+
ValueError: When required parameters are missing or invalid:
|
|
1333
|
+
- Empty or None values for required fields
|
|
1334
|
+
- Invalid GUID format
|
|
1335
|
+
- Out-of-range values
|
|
1336
|
+
|
|
1337
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
1338
|
+
- DefaultAzureCredential not configured
|
|
1339
|
+
- Insufficient permissions
|
|
1340
|
+
- Expired authentication token
|
|
1341
|
+
|
|
1342
|
+
HTTPError: When Purview API returns error:
|
|
1343
|
+
- 400: Bad request (invalid parameters)
|
|
1344
|
+
- 401: Unauthorized (authentication failed)
|
|
1345
|
+
- 403: Forbidden (insufficient permissions)
|
|
1346
|
+
- 404: Resource not found
|
|
1347
|
+
- 429: Rate limit exceeded
|
|
1348
|
+
- 500: Internal server error
|
|
1349
|
+
|
|
1350
|
+
NetworkError: When network connectivity fails
|
|
1351
|
+
|
|
1352
|
+
Example:
|
|
1353
|
+
# Basic usage
|
|
1354
|
+
client = Lineage()
|
|
1355
|
+
|
|
1356
|
+
result = client.lineageCSVTemplates(args=...)
|
|
1357
|
+
print(f"Result: {result}")
|
|
1358
|
+
|
|
1359
|
+
Use Cases:
|
|
1360
|
+
- [TODO: Add specific use cases for this operation]
|
|
1361
|
+
- [TODO: Include business context]
|
|
1362
|
+
- [TODO: Explain when to use this method]
|
|
1363
|
+
"""
|
|
1364
|
+
templates = {
|
|
1365
|
+
"basic": {
|
|
1366
|
+
"columns": ["source_entity_guid", "target_entity_guid", "relationship_type", "process_name"],
|
|
1367
|
+
"description": "Basic lineage with source, target, and process name"
|
|
1368
|
+
},
|
|
1369
|
+
"detailed": {
|
|
1370
|
+
"columns": ["source_entity_guid", "target_entity_guid", "relationship_type", "process_name", "description", "confidence_score", "owner", "metadata"],
|
|
1371
|
+
"description": "Detailed lineage with additional metadata"
|
|
1372
|
+
},
|
|
1373
|
+
"qualified_names": {
|
|
1374
|
+
"columns": ["source_qualified_name", "target_qualified_name", "source_type", "target_type", "process_name", "description"],
|
|
1375
|
+
"description": "Lineage using qualified names instead of GUIDs"
|
|
1376
|
+
}
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1379
|
+
return {
|
|
1380
|
+
"templates": templates,
|
|
1381
|
+
"recommended": "detailed"
|
|
1382
|
+
}
|
|
1383
|
+
|
|
1384
|
+
# === LINEAGE ANALYTICS AND REPORTING ===
|
|
1385
|
+
|
|
1386
|
+
@decorator
|
|
1387
|
+
def lineageReadAnalytics(self, args):
|
|
1388
|
+
"""
|
|
1389
|
+
Retrieve lineage information information.
|
|
1390
|
+
|
|
1391
|
+
Retrieves detailed information about the specified lineage information.
|
|
1392
|
+
Returns complete lineage information metadata and properties.
|
|
1393
|
+
|
|
1394
|
+
Args:
|
|
1395
|
+
args: Dictionary of operation arguments.
|
|
1396
|
+
Contains operation-specific parameters.
|
|
1397
|
+
See method implementation for details.
|
|
1398
|
+
|
|
1399
|
+
Returns:
|
|
1400
|
+
Dictionary containing lineage information information:
|
|
1401
|
+
{
|
|
1402
|
+
'guid': str, # Unique identifier
|
|
1403
|
+
'name': str, # Resource name
|
|
1404
|
+
'attributes': dict, # Resource attributes
|
|
1405
|
+
'status': str, # Resource status
|
|
1406
|
+
'updateTime': int # Last update timestamp
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
Raises:
|
|
1410
|
+
ValueError: When required parameters are missing or invalid:
|
|
1411
|
+
- Empty or None values for required fields
|
|
1412
|
+
- Invalid GUID format
|
|
1413
|
+
- Out-of-range values
|
|
1414
|
+
|
|
1415
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
1416
|
+
- DefaultAzureCredential not configured
|
|
1417
|
+
- Insufficient permissions
|
|
1418
|
+
- Expired authentication token
|
|
1419
|
+
|
|
1420
|
+
HTTPError: When Purview API returns error:
|
|
1421
|
+
- 400: Bad request (invalid parameters)
|
|
1422
|
+
- 401: Unauthorized (authentication failed)
|
|
1423
|
+
- 403: Forbidden (insufficient permissions)
|
|
1424
|
+
- 404: Resource not found
|
|
1425
|
+
- 429: Rate limit exceeded
|
|
1426
|
+
- 500: Internal server error
|
|
1427
|
+
|
|
1428
|
+
NetworkError: When network connectivity fails
|
|
1429
|
+
|
|
1430
|
+
Example:
|
|
1431
|
+
# Basic usage
|
|
1432
|
+
client = Lineage()
|
|
1433
|
+
|
|
1434
|
+
result = client.lineageReadAnalytics(args=...)
|
|
1435
|
+
print(f"Result: {result}")
|
|
1436
|
+
|
|
1437
|
+
Use Cases:
|
|
1438
|
+
- Data Discovery: Find and explore data assets
|
|
1439
|
+
- Compliance Auditing: Review metadata and classifications
|
|
1440
|
+
- Reporting: Generate catalog reports
|
|
1441
|
+
"""
|
|
1442
|
+
self.method = "GET"
|
|
1443
|
+
self.endpoint = f"{ENDPOINTS['lineage']['get'].format(guid=args['--guid'])}/analytics"
|
|
1444
|
+
self.params = {
|
|
1445
|
+
**get_api_version_params("datamap"),
|
|
1446
|
+
"startTime": args.get("--startTime"),
|
|
1447
|
+
"endTime": args.get("--endTime"),
|
|
1448
|
+
"metrics": args.get("--metrics", "all"),
|
|
1449
|
+
"aggregation": args.get("--aggregation", "daily"),
|
|
1450
|
+
}
|
|
1451
|
+
|
|
1452
|
+
@decorator
|
|
1453
|
+
def lineageGenerateReport(self, args):
|
|
1454
|
+
"""
|
|
1455
|
+
Perform operation on resource.
|
|
1456
|
+
|
|
1457
|
+
|
|
1458
|
+
|
|
1459
|
+
Args:
|
|
1460
|
+
args: Dictionary of operation arguments.
|
|
1461
|
+
Contains operation-specific parameters.
|
|
1462
|
+
See method implementation for details.
|
|
1463
|
+
|
|
1464
|
+
Returns:
|
|
1465
|
+
[TODO: Specify return type and structure]
|
|
1466
|
+
[TODO: Document nested fields]
|
|
1467
|
+
|
|
1468
|
+
Raises:
|
|
1469
|
+
ValueError: When required parameters are missing or invalid:
|
|
1470
|
+
- Empty or None values for required fields
|
|
1471
|
+
- Invalid GUID format
|
|
1472
|
+
- Out-of-range values
|
|
1473
|
+
|
|
1474
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
1475
|
+
- DefaultAzureCredential not configured
|
|
1476
|
+
- Insufficient permissions
|
|
1477
|
+
- Expired authentication token
|
|
1478
|
+
|
|
1479
|
+
HTTPError: When Purview API returns error:
|
|
1480
|
+
- 400: Bad request (invalid parameters)
|
|
1481
|
+
- 401: Unauthorized (authentication failed)
|
|
1482
|
+
- 403: Forbidden (insufficient permissions)
|
|
1483
|
+
- 404: Resource not found
|
|
1484
|
+
- 429: Rate limit exceeded
|
|
1485
|
+
- 500: Internal server error
|
|
1486
|
+
|
|
1487
|
+
NetworkError: When network connectivity fails
|
|
1488
|
+
|
|
1489
|
+
Example:
|
|
1490
|
+
# Basic usage
|
|
1491
|
+
client = Lineage()
|
|
1492
|
+
|
|
1493
|
+
result = client.lineageGenerateReport(args=...)
|
|
1494
|
+
print(f"Result: {result}")
|
|
1495
|
+
|
|
1496
|
+
Use Cases:
|
|
1497
|
+
- [TODO: Add specific use cases for this operation]
|
|
1498
|
+
- [TODO: Include business context]
|
|
1499
|
+
- [TODO: Explain when to use this method]
|
|
1500
|
+
"""
|
|
1501
|
+
self.method = "POST"
|
|
1502
|
+
self.endpoint = f"{ENDPOINTS['lineage']['get'].format(guid=args['--guid'])}/report"
|
|
1503
|
+
self.params = {
|
|
1504
|
+
**get_api_version_params("datamap"),
|
|
1505
|
+
"format": args.get("--format", "json"),
|
|
1506
|
+
"includeDetails": str(args.get("--includeDetails", True)).lower(),
|
|
1507
|
+
}
|
|
1508
|
+
self.payload = get_json(args, "--payloadFile") if args.get("--payloadFile") else {}
|
|
1509
|
+
|
|
1510
|
+
# === LINEAGE DISCOVERY AND SEARCH ===
|
|
1511
|
+
|
|
1512
|
+
@decorator
|
|
1513
|
+
def lineageSearch(self, args):
|
|
1514
|
+
"""
|
|
1515
|
+
Search for lineage informations.
|
|
1516
|
+
|
|
1517
|
+
Searches for resources matching the specified criteria.
|
|
1518
|
+
Supports filtering, pagination, and sorting.
|
|
1519
|
+
|
|
1520
|
+
Args:
|
|
1521
|
+
args: Dictionary of operation arguments.
|
|
1522
|
+
Contains operation-specific parameters.
|
|
1523
|
+
See method implementation for details.
|
|
1524
|
+
|
|
1525
|
+
Returns:
|
|
1526
|
+
Dictionary containing search results:
|
|
1527
|
+
{
|
|
1528
|
+
'value': [...] # List of matching resources
|
|
1529
|
+
'count': int, # Total results count
|
|
1530
|
+
'nextLink': str # Pagination link (if applicable)
|
|
1531
|
+
}
|
|
1532
|
+
|
|
1533
|
+
Raises:
|
|
1534
|
+
ValueError: When required parameters are missing or invalid:
|
|
1535
|
+
- Empty or None values for required fields
|
|
1536
|
+
- Invalid GUID format
|
|
1537
|
+
- Out-of-range values
|
|
1538
|
+
|
|
1539
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
1540
|
+
- DefaultAzureCredential not configured
|
|
1541
|
+
- Insufficient permissions
|
|
1542
|
+
- Expired authentication token
|
|
1543
|
+
|
|
1544
|
+
HTTPError: When Purview API returns error:
|
|
1545
|
+
- 400: Bad request (invalid parameters)
|
|
1546
|
+
- 401: Unauthorized (authentication failed)
|
|
1547
|
+
- 403: Forbidden (insufficient permissions)
|
|
1548
|
+
- 404: Resource not found
|
|
1549
|
+
- 429: Rate limit exceeded
|
|
1550
|
+
- 500: Internal server error
|
|
1551
|
+
|
|
1552
|
+
NetworkError: When network connectivity fails
|
|
1553
|
+
|
|
1554
|
+
Example:
|
|
1555
|
+
# Basic usage
|
|
1556
|
+
client = Lineage()
|
|
1557
|
+
|
|
1558
|
+
result = client.lineageSearch(args=...)
|
|
1559
|
+
print(f"Result: {result}")
|
|
1560
|
+
|
|
1561
|
+
Use Cases:
|
|
1562
|
+
- Data Discovery: Locate datasets by name or properties
|
|
1563
|
+
- Impact Analysis: Find all assets related to a term
|
|
1564
|
+
- Compliance: Identify sensitive data across catalog
|
|
1565
|
+
"""
|
|
1566
|
+
self.method = "GET"
|
|
1567
|
+
self.endpoint = f"{ENDPOINTS['lineage']['get'].replace('/{guid}', '/search')}"
|
|
1568
|
+
self.params = {
|
|
1569
|
+
**get_api_version_params("datamap"),
|
|
1570
|
+
"query": args.get("--query"),
|
|
1571
|
+
"entityType": args.get("--entityType"),
|
|
1572
|
+
"direction": args.get("--direction", "BOTH"),
|
|
1573
|
+
"limit": args.get("--limit", 50),
|
|
1574
|
+
"offset": args.get("--offset", 0),
|
|
1575
|
+
}
|
|
1576
|
+
|
|
1577
|
+
# === LEGACY COMPATIBILITY METHODS ===
|
|
1578
|
+
|
|
1579
|
+
@decorator
|
|
1580
|
+
def lineageReadByGuid(self, args):
|
|
1581
|
+
"""
|
|
1582
|
+
Retrieve lineage information information.
|
|
1583
|
+
|
|
1584
|
+
Retrieves detailed information about the specified lineage information.
|
|
1585
|
+
Returns complete lineage information metadata and properties.
|
|
1586
|
+
|
|
1587
|
+
Args:
|
|
1588
|
+
args: Dictionary of operation arguments.
|
|
1589
|
+
Contains operation-specific parameters.
|
|
1590
|
+
See method implementation for details.
|
|
1591
|
+
|
|
1592
|
+
Returns:
|
|
1593
|
+
Dictionary containing lineage information information:
|
|
1594
|
+
{
|
|
1595
|
+
'guid': str, # Unique identifier
|
|
1596
|
+
'name': str, # Resource name
|
|
1597
|
+
'attributes': dict, # Resource attributes
|
|
1598
|
+
'status': str, # Resource status
|
|
1599
|
+
'updateTime': int # Last update timestamp
|
|
1600
|
+
}
|
|
1601
|
+
|
|
1602
|
+
Raises:
|
|
1603
|
+
ValueError: When required parameters are missing or invalid:
|
|
1604
|
+
- Empty or None values for required fields
|
|
1605
|
+
- Invalid GUID format
|
|
1606
|
+
- Out-of-range values
|
|
1607
|
+
|
|
1608
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
1609
|
+
- DefaultAzureCredential not configured
|
|
1610
|
+
- Insufficient permissions
|
|
1611
|
+
- Expired authentication token
|
|
1612
|
+
|
|
1613
|
+
HTTPError: When Purview API returns error:
|
|
1614
|
+
- 400: Bad request (invalid parameters)
|
|
1615
|
+
- 401: Unauthorized (authentication failed)
|
|
1616
|
+
- 403: Forbidden (insufficient permissions)
|
|
1617
|
+
- 404: Resource not found
|
|
1618
|
+
- 429: Rate limit exceeded
|
|
1619
|
+
- 500: Internal server error
|
|
1620
|
+
|
|
1621
|
+
NetworkError: When network connectivity fails
|
|
1622
|
+
|
|
1623
|
+
Example:
|
|
1624
|
+
# Basic usage
|
|
1625
|
+
client = Lineage()
|
|
1626
|
+
|
|
1627
|
+
result = client.lineageReadByGuid(args=...)
|
|
1628
|
+
print(f"Result: {result}")
|
|
1629
|
+
|
|
1630
|
+
Use Cases:
|
|
1631
|
+
- Data Discovery: Find and explore data assets
|
|
1632
|
+
- Compliance Auditing: Review metadata and classifications
|
|
1633
|
+
- Reporting: Generate catalog reports
|
|
1634
|
+
"""
|
|
1635
|
+
return self.lineageRead(args)
|
|
1636
|
+
|
|
1637
|
+
@decorator
|
|
1638
|
+
def lineageReadByUniqueAttribute(self, args):
|
|
1639
|
+
"""
|
|
1640
|
+
Retrieve lineage information information.
|
|
1641
|
+
|
|
1642
|
+
Retrieves detailed information about the specified lineage information.
|
|
1643
|
+
Returns complete lineage information metadata and properties.
|
|
1644
|
+
|
|
1645
|
+
Args:
|
|
1646
|
+
args: Dictionary of operation arguments.
|
|
1647
|
+
Contains operation-specific parameters.
|
|
1648
|
+
See method implementation for details.
|
|
1649
|
+
|
|
1650
|
+
Returns:
|
|
1651
|
+
Dictionary containing lineage information information:
|
|
1652
|
+
{
|
|
1653
|
+
'guid': str, # Unique identifier
|
|
1654
|
+
'name': str, # Resource name
|
|
1655
|
+
'attributes': dict, # Resource attributes
|
|
1656
|
+
'status': str, # Resource status
|
|
1657
|
+
'updateTime': int # Last update timestamp
|
|
1658
|
+
}
|
|
1659
|
+
|
|
1660
|
+
Raises:
|
|
1661
|
+
ValueError: When required parameters are missing or invalid:
|
|
1662
|
+
- Empty or None values for required fields
|
|
1663
|
+
- Invalid GUID format
|
|
1664
|
+
- Out-of-range values
|
|
1665
|
+
|
|
1666
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
1667
|
+
- DefaultAzureCredential not configured
|
|
1668
|
+
- Insufficient permissions
|
|
1669
|
+
- Expired authentication token
|
|
1670
|
+
|
|
1671
|
+
HTTPError: When Purview API returns error:
|
|
1672
|
+
- 400: Bad request (invalid parameters)
|
|
1673
|
+
- 401: Unauthorized (authentication failed)
|
|
1674
|
+
- 403: Forbidden (insufficient permissions)
|
|
1675
|
+
- 404: Resource not found
|
|
1676
|
+
- 429: Rate limit exceeded
|
|
1677
|
+
- 500: Internal server error
|
|
1678
|
+
|
|
1679
|
+
NetworkError: When network connectivity fails
|
|
1680
|
+
|
|
1681
|
+
Example:
|
|
1682
|
+
# Basic usage
|
|
1683
|
+
client = Lineage()
|
|
1684
|
+
|
|
1685
|
+
result = client.lineageReadByUniqueAttribute(args=...)
|
|
1686
|
+
print(f"Result: {result}")
|
|
1687
|
+
|
|
1688
|
+
Use Cases:
|
|
1689
|
+
- Data Discovery: Find and explore data assets
|
|
1690
|
+
- Compliance Auditing: Review metadata and classifications
|
|
1691
|
+
- Reporting: Generate catalog reports
|
|
1692
|
+
"""
|
|
1693
|
+
return self.lineageReadUniqueAttribute(args)
|
|
1694
|
+
|
|
1695
|
+
@decorator
|
|
1696
|
+
def lineageReadNext(self, args):
|
|
1697
|
+
"""
|
|
1698
|
+
Retrieve lineage information information.
|
|
1699
|
+
|
|
1700
|
+
Retrieves detailed information about the specified lineage information.
|
|
1701
|
+
Returns complete lineage information metadata and properties.
|
|
1702
|
+
|
|
1703
|
+
Args:
|
|
1704
|
+
args: Dictionary of operation arguments.
|
|
1705
|
+
Contains operation-specific parameters.
|
|
1706
|
+
See method implementation for details.
|
|
1707
|
+
|
|
1708
|
+
Returns:
|
|
1709
|
+
Dictionary containing lineage information information:
|
|
1710
|
+
{
|
|
1711
|
+
'guid': str, # Unique identifier
|
|
1712
|
+
'name': str, # Resource name
|
|
1713
|
+
'attributes': dict, # Resource attributes
|
|
1714
|
+
'status': str, # Resource status
|
|
1715
|
+
'updateTime': int # Last update timestamp
|
|
1716
|
+
}
|
|
1717
|
+
|
|
1718
|
+
Raises:
|
|
1719
|
+
ValueError: When required parameters are missing or invalid:
|
|
1720
|
+
- Empty or None values for required fields
|
|
1721
|
+
- Invalid GUID format
|
|
1722
|
+
- Out-of-range values
|
|
1723
|
+
|
|
1724
|
+
AuthenticationError: When Azure credentials are invalid:
|
|
1725
|
+
- DefaultAzureCredential not configured
|
|
1726
|
+
- Insufficient permissions
|
|
1727
|
+
- Expired authentication token
|
|
1728
|
+
|
|
1729
|
+
HTTPError: When Purview API returns error:
|
|
1730
|
+
- 400: Bad request (invalid parameters)
|
|
1731
|
+
- 401: Unauthorized (authentication failed)
|
|
1732
|
+
- 403: Forbidden (insufficient permissions)
|
|
1733
|
+
- 404: Resource not found
|
|
1734
|
+
- 429: Rate limit exceeded
|
|
1735
|
+
- 500: Internal server error
|
|
1736
|
+
|
|
1737
|
+
NetworkError: When network connectivity fails
|
|
1738
|
+
|
|
1739
|
+
Example:
|
|
1740
|
+
# Basic usage
|
|
1741
|
+
client = Lineage()
|
|
1742
|
+
|
|
1743
|
+
result = client.lineageReadNext(args=...)
|
|
1744
|
+
print(f"Result: {result}")
|
|
1745
|
+
|
|
1746
|
+
Use Cases:
|
|
1747
|
+
- Data Discovery: Find and explore data assets
|
|
1748
|
+
- Compliance Auditing: Review metadata and classifications
|
|
1749
|
+
- Reporting: Generate catalog reports
|
|
1750
|
+
"""
|
|
1751
|
+
return self.lineageReadNextPage(args)
|
|
1752
|
+
|
|
1753
|
+
def lineageCreateColumnLevel(self, args):
|
|
1754
|
+
"""
|
|
1755
|
+
Create column-level lineage between tables (supports 1 source → N targets).
|
|
1756
|
+
|
|
1757
|
+
This method creates Process entities that link specific columns from a source table
|
|
1758
|
+
to columns in target table(s), establishing column-level data lineage.
|
|
1759
|
+
|
|
1760
|
+
Args:
|
|
1761
|
+
args: Dictionary containing:
|
|
1762
|
+
--source-table-guid: GUID of the source table
|
|
1763
|
+
--target-table-guids: List of GUIDs of target tables (or single GUID for backward compat)
|
|
1764
|
+
--source-column: Name of the source column
|
|
1765
|
+
--target-columns: List of target column names (or single name for backward compat)
|
|
1766
|
+
--process-name: Optional name for the process (default: auto-generated)
|
|
1767
|
+
--description: Optional description
|
|
1768
|
+
--owner: Optional owner (default: data-engineering)
|
|
1769
|
+
--validate-types: Boolean to validate column type compatibility
|
|
1770
|
+
|
|
1771
|
+
Returns:
|
|
1772
|
+
Dictionary with status and created entities
|
|
1773
|
+
|
|
1774
|
+
Raises:
|
|
1775
|
+
ValueError: When required parameters are missing
|
|
1776
|
+
HTTPError: When API returns error status
|
|
1777
|
+
|
|
1778
|
+
Example:
|
|
1779
|
+
# Single target
|
|
1780
|
+
client = Lineage()
|
|
1781
|
+
args = {
|
|
1782
|
+
"--source-table-guid": "abc-123",
|
|
1783
|
+
"--target-table-guids": ["def-456"],
|
|
1784
|
+
"--source-column": "CityKey",
|
|
1785
|
+
"--target-columns": ["CityKey"],
|
|
1786
|
+
}
|
|
1787
|
+
|
|
1788
|
+
# Multiple targets
|
|
1789
|
+
args = {
|
|
1790
|
+
"--source-table-guid": "abc-123",
|
|
1791
|
+
"--target-table-guids": ["def-456", "ghi-789"],
|
|
1792
|
+
"--source-column": "CityKey",
|
|
1793
|
+
"--target-columns": ["CityKey", "City_ID"],
|
|
1794
|
+
}
|
|
1795
|
+
result = client.lineageCreateColumnLevel(args)
|
|
1796
|
+
|
|
1797
|
+
Use Cases:
|
|
1798
|
+
- ETL Documentation: Document column transformations
|
|
1799
|
+
- Data Lineage: Track data flow at column level
|
|
1800
|
+
- Impact Analysis: Understand column dependencies
|
|
1801
|
+
- Multi-target mapping: One source feeding multiple targets
|
|
1802
|
+
"""
|
|
1803
|
+
from .endpoint import get_data
|
|
1804
|
+
|
|
1805
|
+
# Extract parameters with backward compatibility
|
|
1806
|
+
source_table_guid = args.get("--source-table-guid")
|
|
1807
|
+
|
|
1808
|
+
# Support both old (single) and new (multiple) formats
|
|
1809
|
+
target_table_guids = args.get("--target-table-guids")
|
|
1810
|
+
if not target_table_guids:
|
|
1811
|
+
# Backward compatibility: single target
|
|
1812
|
+
single_target = args.get("--target-table-guid")
|
|
1813
|
+
target_table_guids = [single_target] if single_target else []
|
|
1814
|
+
|
|
1815
|
+
source_column_name = args.get("--source-column")
|
|
1816
|
+
|
|
1817
|
+
target_columns = args.get("--target-columns")
|
|
1818
|
+
if not target_columns:
|
|
1819
|
+
# Backward compatibility: single column
|
|
1820
|
+
single_column = args.get("--target-column")
|
|
1821
|
+
target_columns = [single_column] if single_column else []
|
|
1822
|
+
|
|
1823
|
+
# Validation
|
|
1824
|
+
if not source_table_guid:
|
|
1825
|
+
raise ValueError("Missing required parameter: --source-table-guid")
|
|
1826
|
+
if not source_column_name:
|
|
1827
|
+
raise ValueError("Missing required parameter: --source-column")
|
|
1828
|
+
if not target_table_guids or len(target_table_guids) == 0:
|
|
1829
|
+
raise ValueError("Missing required parameter: --target-table-guids (or --target-table-guid)")
|
|
1830
|
+
if not target_columns or len(target_columns) == 0:
|
|
1831
|
+
raise ValueError("Missing required parameter: --target-columns (or --target-column)")
|
|
1832
|
+
|
|
1833
|
+
if len(target_table_guids) != len(target_columns):
|
|
1834
|
+
raise ValueError(f"Mismatch: {len(target_table_guids)} target tables but {len(target_columns)} target columns")
|
|
1835
|
+
|
|
1836
|
+
# Extract optional parameters (defined here for use in loop)
|
|
1837
|
+
process_name = args.get("--process-name")
|
|
1838
|
+
description = args.get("--description")
|
|
1839
|
+
owner = args.get("--owner", "data-engineering")
|
|
1840
|
+
validate_types = args.get("--validate-types", False)
|
|
1841
|
+
|
|
1842
|
+
# Step 1: Get source table columns using the sync client
|
|
1843
|
+
source_table = get_data({
|
|
1844
|
+
"app": "catalog",
|
|
1845
|
+
"method": "GET",
|
|
1846
|
+
"endpoint": f"/datamap/api/atlas/v2/entity/guid/{source_table_guid}",
|
|
1847
|
+
"params": get_api_version_params("datamap")
|
|
1848
|
+
})
|
|
1849
|
+
|
|
1850
|
+
if not source_table or isinstance(source_table, dict) and source_table.get("status") == "error":
|
|
1851
|
+
return {"status": "error", "message": f"Failed to get source table: {source_table}"}
|
|
1852
|
+
|
|
1853
|
+
source_columns_list = source_table.get('entity', {}).get('relationshipAttributes', {}).get('columns', [])
|
|
1854
|
+
|
|
1855
|
+
source_column = None
|
|
1856
|
+
for col in source_columns_list:
|
|
1857
|
+
if col.get('displayText', '').lower() == source_column_name.lower():
|
|
1858
|
+
source_column = col
|
|
1859
|
+
break
|
|
1860
|
+
|
|
1861
|
+
if not source_column:
|
|
1862
|
+
available_cols = [c.get('displayText') for c in source_columns_list]
|
|
1863
|
+
return {"status": "error", "message": f"Source column '{source_column_name}' not found. Available: {available_cols}"}
|
|
1864
|
+
|
|
1865
|
+
source_column_guid = source_column['guid']
|
|
1866
|
+
source_data_type = source_column.get('attributes', {}).get('dataType', 'unknown')
|
|
1867
|
+
|
|
1868
|
+
# Step 2: Process each target (multi-target support)
|
|
1869
|
+
results = []
|
|
1870
|
+
all_entities = []
|
|
1871
|
+
all_relationships = []
|
|
1872
|
+
relationship_guid_counter = -2 # Start from -2 for relationship GUIDs
|
|
1873
|
+
|
|
1874
|
+
for idx, (target_table_guid, target_column_name) in enumerate(zip(target_table_guids, target_columns)):
|
|
1875
|
+
# Get target table columns
|
|
1876
|
+
target_table = get_data({
|
|
1877
|
+
"app": "catalog",
|
|
1878
|
+
"method": "GET",
|
|
1879
|
+
"endpoint": f"/datamap/api/atlas/v2/entity/guid/{target_table_guid}",
|
|
1880
|
+
"params": get_api_version_params("datamap")
|
|
1881
|
+
})
|
|
1882
|
+
|
|
1883
|
+
if not target_table or isinstance(target_table, dict) and target_table.get("status") == "error":
|
|
1884
|
+
results.append({
|
|
1885
|
+
"target_index": idx,
|
|
1886
|
+
"target_table_guid": target_table_guid,
|
|
1887
|
+
"target_column": target_column_name,
|
|
1888
|
+
"status": "error",
|
|
1889
|
+
"message": f"Failed to get target table: {target_table}"
|
|
1890
|
+
})
|
|
1891
|
+
continue
|
|
1892
|
+
|
|
1893
|
+
target_columns_list = target_table.get('entity', {}).get('relationshipAttributes', {}).get('columns', [])
|
|
1894
|
+
|
|
1895
|
+
target_column = None
|
|
1896
|
+
for col in target_columns_list:
|
|
1897
|
+
if col.get('displayText', '').lower() == target_column_name.lower():
|
|
1898
|
+
target_column = col
|
|
1899
|
+
break
|
|
1900
|
+
|
|
1901
|
+
if not target_column:
|
|
1902
|
+
available_cols = [c.get('displayText') for c in target_columns_list]
|
|
1903
|
+
results.append({
|
|
1904
|
+
"target_index": idx,
|
|
1905
|
+
"target_table_guid": target_table_guid,
|
|
1906
|
+
"target_column": target_column_name,
|
|
1907
|
+
"status": "error",
|
|
1908
|
+
"message": f"Target column '{target_column_name}' not found. Available: {available_cols}"
|
|
1909
|
+
})
|
|
1910
|
+
continue
|
|
1911
|
+
|
|
1912
|
+
target_column_guid = target_column['guid']
|
|
1913
|
+
target_data_type = target_column.get('attributes', {}).get('dataType', 'unknown')
|
|
1914
|
+
|
|
1915
|
+
# Type validation if requested
|
|
1916
|
+
if validate_types:
|
|
1917
|
+
if not self._are_types_compatible(source_data_type, target_data_type):
|
|
1918
|
+
results.append({
|
|
1919
|
+
"target_index": idx,
|
|
1920
|
+
"target_table_guid": target_table_guid,
|
|
1921
|
+
"target_column": target_column_name,
|
|
1922
|
+
"status": "error",
|
|
1923
|
+
"message": f"Type mismatch: source '{source_data_type}' not compatible with target '{target_data_type}'"
|
|
1924
|
+
})
|
|
1925
|
+
continue
|
|
1926
|
+
|
|
1927
|
+
# Generate unique qualified name and process name
|
|
1928
|
+
process_guid = f"-{idx + 1}" # -1, -2, -3, etc. for each process
|
|
1929
|
+
qualified_name = f"ColumnMapping_{source_column_name}_{source_table_guid}_to_{target_column_name}_{target_table_guid}@default"
|
|
1930
|
+
|
|
1931
|
+
default_process_name = f"{source_column_name}_to_{target_column_name}_Mapping"
|
|
1932
|
+
final_process_name = process_name if process_name else default_process_name
|
|
1933
|
+
|
|
1934
|
+
default_description = f"Column lineage: {source_column_name} -> {target_column_name}"
|
|
1935
|
+
final_description = description if description else default_description
|
|
1936
|
+
|
|
1937
|
+
# Create Process entity for this target
|
|
1938
|
+
process_entity = {
|
|
1939
|
+
"guid": process_guid,
|
|
1940
|
+
"typeName": "Process",
|
|
1941
|
+
"attributes": {
|
|
1942
|
+
"qualifiedName": qualified_name,
|
|
1943
|
+
"name": final_process_name,
|
|
1944
|
+
"description": final_description,
|
|
1945
|
+
"owner": owner,
|
|
1946
|
+
"inputs": [{"guid": source_column_guid, "typeName": "column"}],
|
|
1947
|
+
"outputs": [{"guid": target_column_guid, "typeName": "column"}]
|
|
1948
|
+
},
|
|
1949
|
+
"classifications": [],
|
|
1950
|
+
"meanings": []
|
|
1951
|
+
}
|
|
1952
|
+
|
|
1953
|
+
all_entities.append(process_entity)
|
|
1954
|
+
|
|
1955
|
+
# Create relationships for this process
|
|
1956
|
+
input_relationship = {
|
|
1957
|
+
"guid": str(relationship_guid_counter),
|
|
1958
|
+
"typeName": "dataset_process_inputs",
|
|
1959
|
+
"end1": {
|
|
1960
|
+
"guid": source_column_guid,
|
|
1961
|
+
"typeName": "column"
|
|
1962
|
+
},
|
|
1963
|
+
"end2": {
|
|
1964
|
+
"guid": process_guid,
|
|
1965
|
+
"typeName": "Process"
|
|
1966
|
+
}
|
|
1967
|
+
}
|
|
1968
|
+
relationship_guid_counter -= 1
|
|
1969
|
+
|
|
1970
|
+
output_relationship = {
|
|
1971
|
+
"guid": str(relationship_guid_counter),
|
|
1972
|
+
"typeName": "process_dataset_outputs",
|
|
1973
|
+
"end1": {
|
|
1974
|
+
"guid": process_guid,
|
|
1975
|
+
"typeName": "Process"
|
|
1976
|
+
},
|
|
1977
|
+
"end2": {
|
|
1978
|
+
"guid": target_column_guid,
|
|
1979
|
+
"typeName": "column"
|
|
1980
|
+
}
|
|
1981
|
+
}
|
|
1982
|
+
relationship_guid_counter -= 1
|
|
1983
|
+
|
|
1984
|
+
all_relationships.append(input_relationship)
|
|
1985
|
+
all_relationships.append(output_relationship)
|
|
1986
|
+
|
|
1987
|
+
results.append({
|
|
1988
|
+
"target_index": idx,
|
|
1989
|
+
"target_table_guid": target_table_guid,
|
|
1990
|
+
"target_column": target_column_name,
|
|
1991
|
+
"status": "pending"
|
|
1992
|
+
})
|
|
1993
|
+
|
|
1994
|
+
# Check if any targets succeeded
|
|
1995
|
+
if not all_entities:
|
|
1996
|
+
return {
|
|
1997
|
+
"status": "error",
|
|
1998
|
+
"message": "All targets failed validation",
|
|
1999
|
+
"results": results
|
|
2000
|
+
}
|
|
2001
|
+
|
|
2002
|
+
# Step 3: Create all lineages in a single bulk operation
|
|
2003
|
+
column_lineage_payload = {
|
|
2004
|
+
"entities": all_entities,
|
|
2005
|
+
"relationships": all_relationships
|
|
2006
|
+
}
|
|
2007
|
+
|
|
2008
|
+
# Step 4: Create the lineage using the sync client
|
|
2009
|
+
api_result = get_data({
|
|
2010
|
+
"app": "catalog",
|
|
2011
|
+
"method": "POST",
|
|
2012
|
+
"endpoint": ENDPOINTS["entity"]["bulk_create_or_update"],
|
|
2013
|
+
"params": get_api_version_params("datamap"),
|
|
2014
|
+
"payload": column_lineage_payload
|
|
2015
|
+
})
|
|
2016
|
+
|
|
2017
|
+
# Update results with success status
|
|
2018
|
+
created_entities = api_result.get('mutatedEntities', {}).get('CREATE', []) if api_result else []
|
|
2019
|
+
for result in results:
|
|
2020
|
+
if result['status'] == 'pending':
|
|
2021
|
+
result['status'] = 'success'
|
|
2022
|
+
|
|
2023
|
+
return {
|
|
2024
|
+
"status": "success",
|
|
2025
|
+
"message": f"Created {len(all_entities)} column lineage(s)",
|
|
2026
|
+
"created_count": len(all_entities),
|
|
2027
|
+
"results": results,
|
|
2028
|
+
"api_response": api_result
|
|
2029
|
+
}
|
|
2030
|
+
|
|
2031
|
+
def _are_types_compatible(self, source_type, target_type):
|
|
2032
|
+
"""
|
|
2033
|
+
Check if source and target column types are compatible for lineage.
|
|
2034
|
+
|
|
2035
|
+
Args:
|
|
2036
|
+
source_type: Source column data type
|
|
2037
|
+
target_type: Target column data type
|
|
2038
|
+
|
|
2039
|
+
Returns:
|
|
2040
|
+
Boolean indicating compatibility
|
|
2041
|
+
"""
|
|
2042
|
+
# Normalize types
|
|
2043
|
+
source = source_type.lower() if source_type else 'unknown'
|
|
2044
|
+
target = target_type.lower() if target_type else 'unknown'
|
|
2045
|
+
|
|
2046
|
+
# Exact match
|
|
2047
|
+
if source == target:
|
|
2048
|
+
return True
|
|
2049
|
+
|
|
2050
|
+
# Integer family compatibility
|
|
2051
|
+
int_types = {'int', 'integer', 'bigint', 'smallint', 'tinyint', 'long'}
|
|
2052
|
+
if source in int_types and target in int_types:
|
|
2053
|
+
return True
|
|
2054
|
+
|
|
2055
|
+
# Float/decimal family compatibility
|
|
2056
|
+
float_types = {'float', 'double', 'decimal', 'numeric', 'real'}
|
|
2057
|
+
if source in float_types and target in float_types:
|
|
2058
|
+
return True
|
|
2059
|
+
|
|
2060
|
+
# String family compatibility
|
|
2061
|
+
string_types = {'string', 'varchar', 'char', 'text', 'nvarchar', 'nchar'}
|
|
2062
|
+
if source in string_types and target in string_types:
|
|
2063
|
+
return True
|
|
2064
|
+
|
|
2065
|
+
# Date/time family compatibility
|
|
2066
|
+
datetime_types = {'date', 'datetime', 'datetime2', 'timestamp', 'time'}
|
|
2067
|
+
if source in datetime_types and target in datetime_types:
|
|
2068
|
+
return True
|
|
2069
|
+
|
|
2070
|
+
# Allow promotion from int to float
|
|
2071
|
+
if source in int_types and target in float_types:
|
|
2072
|
+
return True
|
|
2073
|
+
|
|
2074
|
+
# Unknown types are compatible (permissive approach)
|
|
2075
|
+
if source == 'unknown' or target == 'unknown':
|
|
2076
|
+
return True
|
|
2077
|
+
|
|
2078
|
+
return False
|
|
2079
|
+
|
|
2080
|
+
@decorator
|
|
2081
|
+
def lineageCreateDirect(self, args):
|
|
2082
|
+
"""
|
|
2083
|
+
Create direct lineage between two datasets (UI-style lineage without visible Process).
|
|
2084
|
+
|
|
2085
|
+
This creates a direct_lineage_dataset_dataset relationship, which is what Purview UI uses
|
|
2086
|
+
when you manually create lineage. The Process is created internally but hidden in the UI.
|
|
2087
|
+
|
|
2088
|
+
Args:
|
|
2089
|
+
args: Dictionary with keys:
|
|
2090
|
+
--source-guid: Source entity GUID
|
|
2091
|
+
--target-guid: Target entity GUID
|
|
2092
|
+
--source-type: Source entity type (e.g., azure_sql_table)
|
|
2093
|
+
--target-type: Target entity type (e.g., azure_sql_table)
|
|
2094
|
+
--column-mapping: Optional column mapping JSON string
|
|
2095
|
+
|
|
2096
|
+
Returns:
|
|
2097
|
+
Created relationship details
|
|
2098
|
+
|
|
2099
|
+
Example:
|
|
2100
|
+
client = Lineage()
|
|
2101
|
+
result = client.lineageCreateDirect({
|
|
2102
|
+
"--source-guid": "9ebbd583-4987-4d1b-b4f5-d8f6f6f60000",
|
|
2103
|
+
"--target-guid": "52c7d566-87ab-4753-a23a-d3f6f6f60000",
|
|
2104
|
+
"--source-type": "azure_sql_table",
|
|
2105
|
+
"--target-type": "azure_sql_table",
|
|
2106
|
+
"--column-mapping": ""
|
|
2107
|
+
})
|
|
2108
|
+
"""
|
|
2109
|
+
source_guid = args.get("--source-guid")
|
|
2110
|
+
target_guid = args.get("--target-guid")
|
|
2111
|
+
source_type = args.get("--source-type", "DataSet")
|
|
2112
|
+
target_type = args.get("--target-type", "DataSet")
|
|
2113
|
+
column_mapping = args.get("--column-mapping", "")
|
|
2114
|
+
|
|
2115
|
+
if not source_guid or not target_guid:
|
|
2116
|
+
raise ValueError("Both --source-guid and --target-guid are required")
|
|
2117
|
+
|
|
2118
|
+
# Create direct lineage relationship (UI-style)
|
|
2119
|
+
relationship = {
|
|
2120
|
+
"typeName": "direct_lineage_dataset_dataset",
|
|
2121
|
+
"guid": "-1", # Let Atlas generate
|
|
2122
|
+
"end1": {
|
|
2123
|
+
"guid": source_guid,
|
|
2124
|
+
"typeName": source_type
|
|
2125
|
+
},
|
|
2126
|
+
"end2": {
|
|
2127
|
+
"guid": target_guid,
|
|
2128
|
+
"typeName": target_type
|
|
2129
|
+
},
|
|
2130
|
+
"attributes": {
|
|
2131
|
+
"columnMapping": column_mapping
|
|
2132
|
+
}
|
|
2133
|
+
}
|
|
2134
|
+
|
|
2135
|
+
self.method = "POST"
|
|
2136
|
+
self.endpoint = ENDPOINTS["relationship"]["create"]
|
|
2137
|
+
self.params = get_api_version_params("datamap")
|
|
2138
|
+
self.payload = relationship
|