pvw-cli 1.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pvw-cli might be problematic. Click here for more details.

Files changed (60) hide show
  1. purviewcli/__init__.py +27 -0
  2. purviewcli/__main__.py +15 -0
  3. purviewcli/cli/__init__.py +5 -0
  4. purviewcli/cli/account.py +199 -0
  5. purviewcli/cli/cli.py +170 -0
  6. purviewcli/cli/collections.py +502 -0
  7. purviewcli/cli/domain.py +361 -0
  8. purviewcli/cli/entity.py +2436 -0
  9. purviewcli/cli/glossary.py +533 -0
  10. purviewcli/cli/health.py +250 -0
  11. purviewcli/cli/insight.py +113 -0
  12. purviewcli/cli/lineage.py +1103 -0
  13. purviewcli/cli/management.py +141 -0
  14. purviewcli/cli/policystore.py +103 -0
  15. purviewcli/cli/relationship.py +75 -0
  16. purviewcli/cli/scan.py +357 -0
  17. purviewcli/cli/search.py +527 -0
  18. purviewcli/cli/share.py +478 -0
  19. purviewcli/cli/types.py +831 -0
  20. purviewcli/cli/unified_catalog.py +3540 -0
  21. purviewcli/cli/workflow.py +402 -0
  22. purviewcli/client/__init__.py +21 -0
  23. purviewcli/client/_account.py +1877 -0
  24. purviewcli/client/_collections.py +1761 -0
  25. purviewcli/client/_domain.py +414 -0
  26. purviewcli/client/_entity.py +3545 -0
  27. purviewcli/client/_glossary.py +3233 -0
  28. purviewcli/client/_health.py +501 -0
  29. purviewcli/client/_insight.py +2873 -0
  30. purviewcli/client/_lineage.py +2138 -0
  31. purviewcli/client/_management.py +2202 -0
  32. purviewcli/client/_policystore.py +2915 -0
  33. purviewcli/client/_relationship.py +1351 -0
  34. purviewcli/client/_scan.py +2607 -0
  35. purviewcli/client/_search.py +1472 -0
  36. purviewcli/client/_share.py +272 -0
  37. purviewcli/client/_types.py +2708 -0
  38. purviewcli/client/_unified_catalog.py +5112 -0
  39. purviewcli/client/_workflow.py +2734 -0
  40. purviewcli/client/api_client.py +1295 -0
  41. purviewcli/client/business_rules.py +675 -0
  42. purviewcli/client/config.py +231 -0
  43. purviewcli/client/data_quality.py +433 -0
  44. purviewcli/client/endpoint.py +123 -0
  45. purviewcli/client/endpoints.py +554 -0
  46. purviewcli/client/exceptions.py +38 -0
  47. purviewcli/client/lineage_visualization.py +797 -0
  48. purviewcli/client/monitoring_dashboard.py +712 -0
  49. purviewcli/client/rate_limiter.py +30 -0
  50. purviewcli/client/retry_handler.py +125 -0
  51. purviewcli/client/scanning_operations.py +523 -0
  52. purviewcli/client/settings.py +1 -0
  53. purviewcli/client/sync_client.py +250 -0
  54. purviewcli/plugins/__init__.py +1 -0
  55. purviewcli/plugins/plugin_system.py +709 -0
  56. pvw_cli-1.2.8.dist-info/METADATA +1618 -0
  57. pvw_cli-1.2.8.dist-info/RECORD +60 -0
  58. pvw_cli-1.2.8.dist-info/WHEEL +5 -0
  59. pvw_cli-1.2.8.dist-info/entry_points.txt +3 -0
  60. pvw_cli-1.2.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,2138 @@
1
+ """
2
+ Lineage Management Client for Microsoft Purview Data Map API
3
+ Based on official API: https://learn.microsoft.com/en-us/rest/api/purview/datamapdataplane/lineage
4
+ API Version: 2023-09-01 / 2024-03-01-preview
5
+
6
+ Complete implementation of ALL Lineage operations from the official specification with 100% coverage:
7
+ - Lineage CRUD Operations (Create, Read, Update, Delete)
8
+ - Upstream and Downstream Lineage Analysis
9
+ - Lineage Graph Operations
10
+ - Impact Analysis
11
+ - Temporal Lineage
12
+ - Lineage Validation
13
+ - CSV-based Bulk Lineage Creation
14
+ - Lineage Analytics and Reporting
15
+ """
16
+
17
+ from .endpoint import Endpoint, decorator, get_json, no_api_call_decorator
18
+ from .endpoints import ENDPOINTS, get_api_version_params
19
+ import json
20
+ import uuid
21
+ from datetime import datetime
22
+
23
+
24
+ class Lineage(Endpoint):
25
+ """Lineage Management Operations - Complete Official API Implementation with 100% Coverage"""
26
+
27
+ def __init__(self):
28
+ Endpoint.__init__(self)
29
+ self.app = "catalog"
30
+
31
+ # === CORE LINEAGE OPERATIONS ===
32
+
33
+ @decorator
34
+ def lineageRead(self, args):
35
+ """
36
+ Retrieve lineage information information.
37
+
38
+ Retrieves detailed information about the specified lineage information.
39
+ Returns complete lineage information metadata and properties.
40
+
41
+ Args:
42
+ args: Dictionary of operation arguments.
43
+ Contains operation-specific parameters.
44
+ See method implementation for details.
45
+
46
+ Returns:
47
+ Dictionary containing lineage information information:
48
+ {
49
+ 'guid': str, # Unique identifier
50
+ 'name': str, # Resource name
51
+ 'attributes': dict, # Resource attributes
52
+ 'status': str, # Resource status
53
+ 'updateTime': int # Last update timestamp
54
+ }
55
+
56
+ Raises:
57
+ ValueError: When required parameters are missing or invalid:
58
+ - Empty or None values for required fields
59
+ - Invalid GUID format
60
+ - Out-of-range values
61
+
62
+ AuthenticationError: When Azure credentials are invalid:
63
+ - DefaultAzureCredential not configured
64
+ - Insufficient permissions
65
+ - Expired authentication token
66
+
67
+ HTTPError: When Purview API returns error:
68
+ - 400: Bad request (invalid parameters)
69
+ - 401: Unauthorized (authentication failed)
70
+ - 403: Forbidden (insufficient permissions)
71
+ - 404: Resource not found
72
+ - 429: Rate limit exceeded
73
+ - 500: Internal server error
74
+
75
+ NetworkError: When network connectivity fails
76
+
77
+ Example:
78
+ # Basic usage
79
+ client = Lineage()
80
+
81
+ result = client.lineageRead(args=...)
82
+ print(f"Result: {result}")
83
+
84
+ Use Cases:
85
+ - Data Discovery: Find and explore data assets
86
+ - Compliance Auditing: Review metadata and classifications
87
+ - Reporting: Generate catalog reports
88
+ """
89
+ self.method = "GET"
90
+ self.endpoint = ENDPOINTS["lineage"]["get"].format(guid=args["--guid"])
91
+ self.params = {
92
+ **get_api_version_params("datamap"),
93
+ "direction": args.get("--direction", "BOTH"),
94
+ "depth": args.get("--depth", 3),
95
+ "width": args.get("--width", 10),
96
+ "includeParent": str(args.get("--includeParent", False)).lower(),
97
+ "getDerivedLineage": str(args.get("--getDerivedLineage", False)).lower(),
98
+ }
99
+
100
+ @decorator
101
+ def lineageReadUniqueAttribute(self, args):
102
+ """
103
+ Retrieve lineage information information.
104
+
105
+ Retrieves detailed information about the specified lineage information.
106
+ Returns complete lineage information metadata and properties.
107
+
108
+ Args:
109
+ args: Dictionary of operation arguments.
110
+ Contains operation-specific parameters.
111
+ See method implementation for details.
112
+
113
+ Returns:
114
+ Dictionary containing lineage information information:
115
+ {
116
+ 'guid': str, # Unique identifier
117
+ 'name': str, # Resource name
118
+ 'attributes': dict, # Resource attributes
119
+ 'status': str, # Resource status
120
+ 'updateTime': int # Last update timestamp
121
+ }
122
+
123
+ Raises:
124
+ ValueError: When required parameters are missing or invalid:
125
+ - Empty or None values for required fields
126
+ - Invalid GUID format
127
+ - Out-of-range values
128
+
129
+ AuthenticationError: When Azure credentials are invalid:
130
+ - DefaultAzureCredential not configured
131
+ - Insufficient permissions
132
+ - Expired authentication token
133
+
134
+ HTTPError: When Purview API returns error:
135
+ - 400: Bad request (invalid parameters)
136
+ - 401: Unauthorized (authentication failed)
137
+ - 403: Forbidden (insufficient permissions)
138
+ - 404: Resource not found
139
+ - 429: Rate limit exceeded
140
+ - 500: Internal server error
141
+
142
+ NetworkError: When network connectivity fails
143
+
144
+ Example:
145
+ # Basic usage
146
+ client = Lineage()
147
+
148
+ result = client.lineageReadUniqueAttribute(args=...)
149
+ print(f"Result: {result}")
150
+
151
+ Use Cases:
152
+ - Data Discovery: Find and explore data assets
153
+ - Compliance Auditing: Review metadata and classifications
154
+ - Reporting: Generate catalog reports
155
+ """
156
+ self.method = "GET"
157
+ self.endpoint = ENDPOINTS["lineage"]["get_by_unique_attribute"].format(typeName=args["--typeName"])
158
+ self.params = {
159
+ **get_api_version_params("datamap"),
160
+ "attr:qualifiedName": args["--qualifiedName"],
161
+ "direction": args.get("--direction", "BOTH"),
162
+ "depth": args.get("--depth", 3),
163
+ "width": args.get("--width", 10),
164
+ "includeParent": str(args.get("--includeParent", False)).lower(),
165
+ "getDerivedLineage": str(args.get("--getDerivedLineage", False)).lower(),
166
+ }
167
+
168
+ @decorator
169
+ def lineageReadNextPage(self, args):
170
+ """
171
+ Retrieve lineage information information.
172
+
173
+ Retrieves detailed information about the specified lineage information.
174
+ Returns complete lineage information metadata and properties.
175
+
176
+ Args:
177
+ args: Dictionary of operation arguments.
178
+ Contains operation-specific parameters.
179
+ See method implementation for details.
180
+
181
+ Returns:
182
+ Dictionary containing lineage information information:
183
+ {
184
+ 'guid': str, # Unique identifier
185
+ 'name': str, # Resource name
186
+ 'attributes': dict, # Resource attributes
187
+ 'status': str, # Resource status
188
+ 'updateTime': int # Last update timestamp
189
+ }
190
+
191
+ Raises:
192
+ ValueError: When required parameters are missing or invalid:
193
+ - Empty or None values for required fields
194
+ - Invalid GUID format
195
+ - Out-of-range values
196
+
197
+ AuthenticationError: When Azure credentials are invalid:
198
+ - DefaultAzureCredential not configured
199
+ - Insufficient permissions
200
+ - Expired authentication token
201
+
202
+ HTTPError: When Purview API returns error:
203
+ - 400: Bad request (invalid parameters)
204
+ - 401: Unauthorized (authentication failed)
205
+ - 403: Forbidden (insufficient permissions)
206
+ - 404: Resource not found
207
+ - 429: Rate limit exceeded
208
+ - 500: Internal server error
209
+
210
+ NetworkError: When network connectivity fails
211
+
212
+ Example:
213
+ # Basic usage
214
+ client = Lineage()
215
+
216
+ result = client.lineageReadNextPage(args=...)
217
+ print(f"Result: {result}")
218
+
219
+ Use Cases:
220
+ - Data Discovery: Find and explore data assets
221
+ - Compliance Auditing: Review metadata and classifications
222
+ - Reporting: Generate catalog reports
223
+ """
224
+ self.method = "GET"
225
+ self.endpoint = ENDPOINTS["lineage"]["get_next_page"].format(guid=args["--guid"])
226
+ self.params = {
227
+ **get_api_version_params("datamap"),
228
+ "direction": args.get("--direction", "BOTH"),
229
+ "getDerivedLineage": str(args.get("--getDerivedLineage", False)).lower(),
230
+ "offset": args.get("--offset"),
231
+ "limit": args.get("--limit"),
232
+ }
233
+
234
+ # === ADVANCED LINEAGE OPERATIONS (NEW FOR 100% COVERAGE) ===
235
+
236
+ @decorator
237
+ def lineageReadUpstream(self, args):
238
+ """
239
+ Retrieve lineage information information.
240
+
241
+ Retrieves detailed information about the specified lineage information.
242
+ Returns complete lineage information metadata and properties.
243
+
244
+ Args:
245
+ args: Dictionary of operation arguments.
246
+ Contains operation-specific parameters.
247
+ See method implementation for details.
248
+
249
+ Returns:
250
+ Dictionary containing lineage information information:
251
+ {
252
+ 'guid': str, # Unique identifier
253
+ 'name': str, # Resource name
254
+ 'attributes': dict, # Resource attributes
255
+ 'status': str, # Resource status
256
+ 'updateTime': int # Last update timestamp
257
+ }
258
+
259
+ Raises:
260
+ ValueError: When required parameters are missing or invalid:
261
+ - Empty or None values for required fields
262
+ - Invalid GUID format
263
+ - Out-of-range values
264
+
265
+ AuthenticationError: When Azure credentials are invalid:
266
+ - DefaultAzureCredential not configured
267
+ - Insufficient permissions
268
+ - Expired authentication token
269
+
270
+ HTTPError: When Purview API returns error:
271
+ - 400: Bad request (invalid parameters)
272
+ - 401: Unauthorized (authentication failed)
273
+ - 403: Forbidden (insufficient permissions)
274
+ - 404: Resource not found
275
+ - 429: Rate limit exceeded
276
+ - 500: Internal server error
277
+
278
+ NetworkError: When network connectivity fails
279
+
280
+ Example:
281
+ # Basic usage
282
+ client = Lineage()
283
+
284
+ result = client.lineageReadUpstream(args=...)
285
+ print(f"Result: {result}")
286
+
287
+ Use Cases:
288
+ - Data Discovery: Find and explore data assets
289
+ - Compliance Auditing: Review metadata and classifications
290
+ - Reporting: Generate catalog reports
291
+ """
292
+ self.method = "GET"
293
+ self.endpoint = ENDPOINTS["lineage"]["get_upstream_lineage"].format(guid=args["--guid"])
294
+ self.params = {
295
+ **get_api_version_params("datamap"),
296
+ "depth": args.get("--depth", 3),
297
+ "width": args.get("--width", 10),
298
+ "includeParent": str(args.get("--includeParent", False)).lower(),
299
+ }
300
+
301
+ @decorator
302
+ def lineageReadDownstream(self, args):
303
+ """
304
+ Create a new lineage information.
305
+
306
+ Creates a new lineage information in Microsoft Purview Data Lineage. Tracks data flow and transformations.
307
+ Requires appropriate permissions and valid lineage information definition.
308
+
309
+ Args:
310
+ args: Dictionary of operation arguments.
311
+ Contains operation-specific parameters.
312
+ See method implementation for details.
313
+
314
+ Returns:
315
+ Dictionary containing created lineage information:
316
+ {
317
+ 'guid': str, # Unique identifier
318
+ 'name': str, # Resource name
319
+ 'status': str, # Creation status
320
+ 'attributes': dict, # Resource attributes
321
+ 'createTime': int # Creation timestamp
322
+ }
323
+
324
+ Raises:
325
+ ValueError: When required parameters are missing or invalid:
326
+ - Empty or None values for required fields
327
+ - Invalid GUID format
328
+ - Out-of-range values
329
+
330
+ AuthenticationError: When Azure credentials are invalid:
331
+ - DefaultAzureCredential not configured
332
+ - Insufficient permissions
333
+ - Expired authentication token
334
+
335
+ HTTPError: When Purview API returns error:
336
+ - 400: Bad request (invalid parameters)
337
+ - 401: Unauthorized (authentication failed)
338
+ - 403: Forbidden (insufficient permissions)
339
+ - 404: Resource not found
340
+ - 409: Conflict (resource already exists)
341
+ - 429: Rate limit exceeded
342
+ - 500: Internal server error
343
+
344
+ NetworkError: When network connectivity fails
345
+
346
+ Example:
347
+ # Basic usage
348
+ client = Lineage()
349
+
350
+ result = client.lineageReadDownstream(args=...)
351
+ print(f"Result: {result}")
352
+
353
+ # With detailed data
354
+ data = {
355
+ 'name': 'My Resource',
356
+ 'description': 'Resource description',
357
+ 'attributes': {
358
+ 'key1': 'value1',
359
+ 'key2': 'value2'
360
+ }
361
+ }
362
+
363
+ result = client.lineageReadDownstream(data)
364
+ print(f"Created/Updated: {result['guid']}")
365
+
366
+ Use Cases:
367
+ - Data Onboarding: Register new data sources in catalog
368
+ - Metadata Management: Add descriptive metadata to assets
369
+ - Automation: Programmatically populate catalog
370
+ """
371
+ self.method = "GET"
372
+ self.endpoint = ENDPOINTS["lineage"]["get_downstream_lineage"].format(guid=args["--guid"])
373
+ self.params = {
374
+ **get_api_version_params("datamap"),
375
+ "depth": args.get("--depth", 3),
376
+ "width": args.get("--width", 10),
377
+ "includeParent": str(args.get("--includeParent", False)).lower(),
378
+ }
379
+
380
+ @decorator
381
+ def lineageReadGraph(self, args):
382
+ """
383
+ Retrieve lineage information information.
384
+
385
+ Retrieves detailed information about the specified lineage information.
386
+ Returns complete lineage information metadata and properties.
387
+
388
+ Args:
389
+ args: Dictionary of operation arguments.
390
+ Contains operation-specific parameters.
391
+ See method implementation for details.
392
+
393
+ Returns:
394
+ Dictionary containing lineage information information:
395
+ {
396
+ 'guid': str, # Unique identifier
397
+ 'name': str, # Resource name
398
+ 'attributes': dict, # Resource attributes
399
+ 'status': str, # Resource status
400
+ 'updateTime': int # Last update timestamp
401
+ }
402
+
403
+ Raises:
404
+ ValueError: When required parameters are missing or invalid:
405
+ - Empty or None values for required fields
406
+ - Invalid GUID format
407
+ - Out-of-range values
408
+
409
+ AuthenticationError: When Azure credentials are invalid:
410
+ - DefaultAzureCredential not configured
411
+ - Insufficient permissions
412
+ - Expired authentication token
413
+
414
+ HTTPError: When Purview API returns error:
415
+ - 400: Bad request (invalid parameters)
416
+ - 401: Unauthorized (authentication failed)
417
+ - 403: Forbidden (insufficient permissions)
418
+ - 404: Resource not found
419
+ - 429: Rate limit exceeded
420
+ - 500: Internal server error
421
+
422
+ NetworkError: When network connectivity fails
423
+
424
+ Example:
425
+ # Basic usage
426
+ client = Lineage()
427
+
428
+ result = client.lineageReadGraph(args=...)
429
+ print(f"Result: {result}")
430
+
431
+ Use Cases:
432
+ - Data Discovery: Find and explore data assets
433
+ - Compliance Auditing: Review metadata and classifications
434
+ - Reporting: Generate catalog reports
435
+ """
436
+ self.method = "GET"
437
+ self.endpoint = ENDPOINTS["lineage"]["get_lineage_graph"].format(guid=args["--guid"])
438
+ self.params = {
439
+ **get_api_version_params("datamap"),
440
+ "direction": args.get("--direction", "BOTH"),
441
+ "depth": args.get("--depth", 3),
442
+ "includeProcesses": str(args.get("--includeProcesses", True)).lower(),
443
+ "format": args.get("--format", "json"),
444
+ }
445
+
446
+ @decorator
447
+ def lineageCreate(self, args):
448
+ """
449
+ Create a new lineage information.
450
+
451
+ Creates a new lineage information in Microsoft Purview Data Lineage. Tracks data flow and transformations.
452
+ Requires appropriate permissions and valid lineage information definition.
453
+
454
+ Args:
455
+ args: Dictionary of operation arguments.
456
+ Contains operation-specific parameters.
457
+ See method implementation for details.
458
+
459
+ Returns:
460
+ Dictionary containing created lineage information:
461
+ {
462
+ 'guid': str, # Unique identifier
463
+ 'name': str, # Resource name
464
+ 'status': str, # Creation status
465
+ 'attributes': dict, # Resource attributes
466
+ 'createTime': int # Creation timestamp
467
+ }
468
+
469
+ Raises:
470
+ ValueError: When required parameters are missing or invalid:
471
+ - Empty or None values for required fields
472
+ - Invalid GUID format
473
+ - Out-of-range values
474
+
475
+ AuthenticationError: When Azure credentials are invalid:
476
+ - DefaultAzureCredential not configured
477
+ - Insufficient permissions
478
+ - Expired authentication token
479
+
480
+ HTTPError: When Purview API returns error:
481
+ - 400: Bad request (invalid parameters)
482
+ - 401: Unauthorized (authentication failed)
483
+ - 403: Forbidden (insufficient permissions)
484
+ - 404: Resource not found
485
+ - 409: Conflict (resource already exists)
486
+ - 429: Rate limit exceeded
487
+ - 500: Internal server error
488
+
489
+ NetworkError: When network connectivity fails
490
+
491
+ Example:
492
+ # Basic usage
493
+ client = Lineage()
494
+
495
+ result = client.lineageCreate(args=...)
496
+ print(f"Result: {result}")
497
+
498
+ # With detailed data
499
+ data = {
500
+ 'name': 'My Resource',
501
+ 'description': 'Resource description',
502
+ 'attributes': {
503
+ 'key1': 'value1',
504
+ 'key2': 'value2'
505
+ }
506
+ }
507
+
508
+ result = client.lineageCreate(data)
509
+ print(f"Created/Updated: {result['guid']}")
510
+
511
+ Use Cases:
512
+ - Data Onboarding: Register new data sources in catalog
513
+ - Metadata Management: Add descriptive metadata to assets
514
+ - Automation: Programmatically populate catalog
515
+ """
516
+ self.method = "POST"
517
+ self.endpoint = ENDPOINTS["lineage"]["create_lineage"]
518
+ self.params = get_api_version_params("datamap")
519
+ self.payload = get_json(args, "--payloadFile")
520
+
521
+ @decorator
522
+ def lineageUpdate(self, args):
523
+ """
524
+ Update an existing lineage information.
525
+
526
+ Updates an existing lineage information with new values.
527
+ Only specified fields are modified; others remain unchanged.
528
+
529
+ Args:
530
+ args: Dictionary of operation arguments.
531
+ Contains operation-specific parameters.
532
+ See method implementation for details.
533
+
534
+ Returns:
535
+ Dictionary containing updated lineage information:
536
+ {
537
+ 'guid': str, # Unique identifier
538
+ 'attributes': dict, # Updated attributes
539
+ 'updateTime': int # Update timestamp
540
+ }
541
+
542
+ Raises:
543
+ ValueError: When required parameters are missing or invalid:
544
+ - Empty or None values for required fields
545
+ - Invalid GUID format
546
+ - Out-of-range values
547
+
548
+ AuthenticationError: When Azure credentials are invalid:
549
+ - DefaultAzureCredential not configured
550
+ - Insufficient permissions
551
+ - Expired authentication token
552
+
553
+ HTTPError: When Purview API returns error:
554
+ - 400: Bad request (invalid parameters)
555
+ - 401: Unauthorized (authentication failed)
556
+ - 403: Forbidden (insufficient permissions)
557
+ - 404: Resource not found
558
+ - 429: Rate limit exceeded
559
+ - 500: Internal server error
560
+
561
+ NetworkError: When network connectivity fails
562
+
563
+ Example:
564
+ # Basic usage
565
+ client = Lineage()
566
+
567
+ result = client.lineageUpdate(args=...)
568
+ print(f"Result: {result}")
569
+
570
+ # With detailed data
571
+ data = {
572
+ 'name': 'My Resource',
573
+ 'description': 'Resource description',
574
+ 'attributes': {
575
+ 'key1': 'value1',
576
+ 'key2': 'value2'
577
+ }
578
+ }
579
+
580
+ result = client.lineageUpdate(data)
581
+ print(f"Created/Updated: {result['guid']}")
582
+
583
+ Use Cases:
584
+ - Metadata Enrichment: Update descriptions and tags
585
+ - Ownership Changes: Reassign data ownership
586
+ - Classification: Apply or modify data classifications
587
+ """
588
+ self.method = "PUT"
589
+ self.endpoint = ENDPOINTS["lineage"]["update_lineage"].format(guid=args["--guid"])
590
+ self.params = get_api_version_params("datamap")
591
+ self.payload = get_json(args, "--payloadFile")
592
+
593
+ @decorator
594
+ def lineageDelete(self, args):
595
+ """
596
+ Delete a lineage information.
597
+
598
+ Permanently deletes the specified lineage information.
599
+ This operation cannot be undone. Use with caution.
600
+
601
+ Args:
602
+ args: Dictionary of operation arguments.
603
+ Contains operation-specific parameters.
604
+ See method implementation for details.
605
+
606
+ Returns:
607
+ Dictionary with deletion status:
608
+ {
609
+ 'guid': str, # Deleted resource ID
610
+ 'status': str, # Deletion status
611
+ 'message': str # Confirmation message
612
+ }
613
+
614
+ Raises:
615
+ ValueError: When required parameters are missing or invalid:
616
+ - Empty or None values for required fields
617
+ - Invalid GUID format
618
+ - Out-of-range values
619
+
620
+ AuthenticationError: When Azure credentials are invalid:
621
+ - DefaultAzureCredential not configured
622
+ - Insufficient permissions
623
+ - Expired authentication token
624
+
625
+ HTTPError: When Purview API returns error:
626
+ - 400: Bad request (invalid parameters)
627
+ - 401: Unauthorized (authentication failed)
628
+ - 403: Forbidden (insufficient permissions)
629
+ - 404: Resource not found
630
+ - 429: Rate limit exceeded
631
+ - 500: Internal server error
632
+
633
+ NetworkError: When network connectivity fails
634
+
635
+ Example:
636
+ # Basic usage
637
+ client = Lineage()
638
+
639
+ result = client.lineageDelete(args=...)
640
+ print(f"Result: {result}")
641
+
642
+ Use Cases:
643
+ - Data Cleanup: Remove obsolete or test data
644
+ - Decommissioning: Delete resources no longer in use
645
+ - Testing: Clean up test environments
646
+ """
647
+ self.method = "DELETE"
648
+ self.endpoint = ENDPOINTS["lineage"]["delete_lineage"].format(guid=args["--guid"])
649
+ self.params = get_api_version_params("datamap")
650
+
651
+ @decorator
652
+ def lineageValidate(self, args):
653
+ """
654
+ Perform operation on resource.
655
+
656
+
657
+
658
+ Args:
659
+ args: Dictionary of operation arguments.
660
+ Contains operation-specific parameters.
661
+ See method implementation for details.
662
+
663
+ Returns:
664
+ [TODO: Specify return type and structure]
665
+ [TODO: Document nested fields]
666
+
667
+ Raises:
668
+ ValueError: When required parameters are missing or invalid:
669
+ - Empty or None values for required fields
670
+ - Invalid GUID format
671
+ - Out-of-range values
672
+
673
+ AuthenticationError: When Azure credentials are invalid:
674
+ - DefaultAzureCredential not configured
675
+ - Insufficient permissions
676
+ - Expired authentication token
677
+
678
+ HTTPError: When Purview API returns error:
679
+ - 400: Bad request (invalid parameters)
680
+ - 401: Unauthorized (authentication failed)
681
+ - 403: Forbidden (insufficient permissions)
682
+ - 404: Resource not found
683
+ - 429: Rate limit exceeded
684
+ - 500: Internal server error
685
+
686
+ NetworkError: When network connectivity fails
687
+
688
+ Example:
689
+ # Basic usage
690
+ client = Lineage()
691
+
692
+ result = client.lineageValidate(args=...)
693
+ print(f"Result: {result}")
694
+
695
+ Use Cases:
696
+ - [TODO: Add specific use cases for this operation]
697
+ - [TODO: Include business context]
698
+ - [TODO: Explain when to use this method]
699
+ """
700
+ self.method = "POST"
701
+ self.endpoint = ENDPOINTS["lineage"]["validate_lineage"]
702
+ self.params = get_api_version_params("datamap")
703
+ self.payload = get_json(args, "--payloadFile")
704
+
705
+ @decorator
706
+ def lineageReadImpactAnalysis(self, args):
707
+ """
708
+ Retrieve lineage information information.
709
+
710
+ Retrieves detailed information about the specified lineage information.
711
+ Returns complete lineage information metadata and properties.
712
+
713
+ Args:
714
+ args: Dictionary of operation arguments.
715
+ Contains operation-specific parameters.
716
+ See method implementation for details.
717
+
718
+ Returns:
719
+ Dictionary containing lineage information information:
720
+ {
721
+ 'guid': str, # Unique identifier
722
+ 'name': str, # Resource name
723
+ 'attributes': dict, # Resource attributes
724
+ 'status': str, # Resource status
725
+ 'updateTime': int # Last update timestamp
726
+ }
727
+
728
+ Raises:
729
+ ValueError: When required parameters are missing or invalid:
730
+ - Empty or None values for required fields
731
+ - Invalid GUID format
732
+ - Out-of-range values
733
+
734
+ AuthenticationError: When Azure credentials are invalid:
735
+ - DefaultAzureCredential not configured
736
+ - Insufficient permissions
737
+ - Expired authentication token
738
+
739
+ HTTPError: When Purview API returns error:
740
+ - 400: Bad request (invalid parameters)
741
+ - 401: Unauthorized (authentication failed)
742
+ - 403: Forbidden (insufficient permissions)
743
+ - 404: Resource not found
744
+ - 429: Rate limit exceeded
745
+ - 500: Internal server error
746
+
747
+ NetworkError: When network connectivity fails
748
+
749
+ Example:
750
+ # Basic usage
751
+ client = Lineage()
752
+
753
+ result = client.lineageReadImpactAnalysis(args=...)
754
+ print(f"Result: {result}")
755
+
756
+ Use Cases:
757
+ - Data Discovery: Find and explore data assets
758
+ - Compliance Auditing: Review metadata and classifications
759
+ - Reporting: Generate catalog reports
760
+ """
761
+ self.method = "GET"
762
+ self.endpoint = ENDPOINTS["lineage"]["get_impact_analysis"].format(guid=args["--guid"])
763
+ self.params = {
764
+ **get_api_version_params("datamap"),
765
+ "direction": args.get("--direction", "DOWNSTREAM"),
766
+ "depth": args.get("--depth", 5),
767
+ "analysisType": args.get("--analysisType", "IMPACT"),
768
+ "includeProcesses": str(args.get("--includeProcesses", True)).lower(),
769
+ }
770
+
771
+ @decorator
772
+ def lineageReadTemporal(self, args):
773
+ """
774
+ Retrieve lineage information information.
775
+
776
+ Retrieves detailed information about the specified lineage information.
777
+ Returns complete lineage information metadata and properties.
778
+
779
+ Args:
780
+ args: Dictionary of operation arguments.
781
+ Contains operation-specific parameters.
782
+ See method implementation for details.
783
+
784
+ Returns:
785
+ Dictionary containing lineage information information:
786
+ {
787
+ 'guid': str, # Unique identifier
788
+ 'name': str, # Resource name
789
+ 'attributes': dict, # Resource attributes
790
+ 'status': str, # Resource status
791
+ 'updateTime': int # Last update timestamp
792
+ }
793
+
794
+ Raises:
795
+ ValueError: When required parameters are missing or invalid:
796
+ - Empty or None values for required fields
797
+ - Invalid GUID format
798
+ - Out-of-range values
799
+
800
+ AuthenticationError: When Azure credentials are invalid:
801
+ - DefaultAzureCredential not configured
802
+ - Insufficient permissions
803
+ - Expired authentication token
804
+
805
+ HTTPError: When Purview API returns error:
806
+ - 400: Bad request (invalid parameters)
807
+ - 401: Unauthorized (authentication failed)
808
+ - 403: Forbidden (insufficient permissions)
809
+ - 404: Resource not found
810
+ - 429: Rate limit exceeded
811
+ - 500: Internal server error
812
+
813
+ NetworkError: When network connectivity fails
814
+
815
+ Example:
816
+ # Basic usage
817
+ client = Lineage()
818
+
819
+ result = client.lineageReadTemporal(args=...)
820
+ print(f"Result: {result}")
821
+
822
+ Use Cases:
823
+ - Data Discovery: Find and explore data assets
824
+ - Compliance Auditing: Review metadata and classifications
825
+ - Reporting: Generate catalog reports
826
+ """
827
+ self.method = "GET"
828
+ self.endpoint = ENDPOINTS["lineage"]["get_temporal_lineage"].format(guid=args["--guid"])
829
+ self.params = {
830
+ **get_api_version_params("datamap"),
831
+ "startTime": args.get("--startTime"),
832
+ "endTime": args.get("--endTime"),
833
+ "timeGranularity": args.get("--timeGranularity", "HOUR"),
834
+ "direction": args.get("--direction", "BOTH"),
835
+ "depth": args.get("--depth", 3),
836
+ }
837
+
838
+ # === BULK LINEAGE OPERATIONS (FOR CSV SUPPORT) ===
839
+
840
+ @decorator
841
+ def lineageCreateBulk(self, args):
842
+ """
843
+ Create a new lineage information.
844
+
845
+ Creates a new lineage information in Microsoft Purview Data Lineage. Tracks data flow and transformations.
846
+ Requires appropriate permissions and valid lineage information definition.
847
+
848
+ Args:
849
+ args: Dictionary of operation arguments.
850
+ Contains operation-specific parameters.
851
+ See method implementation for details.
852
+
853
+ Returns:
854
+ Dictionary containing created lineage information:
855
+ {
856
+ 'guid': str, # Unique identifier
857
+ 'name': str, # Resource name
858
+ 'status': str, # Creation status
859
+ 'attributes': dict, # Resource attributes
860
+ 'createTime': int # Creation timestamp
861
+ }
862
+
863
+ Raises:
864
+ ValueError: When required parameters are missing or invalid:
865
+ - Empty or None values for required fields
866
+ - Invalid GUID format
867
+ - Out-of-range values
868
+
869
+ AuthenticationError: When Azure credentials are invalid:
870
+ - DefaultAzureCredential not configured
871
+ - Insufficient permissions
872
+ - Expired authentication token
873
+
874
+ HTTPError: When Purview API returns error:
875
+ - 400: Bad request (invalid parameters)
876
+ - 401: Unauthorized (authentication failed)
877
+ - 403: Forbidden (insufficient permissions)
878
+ - 404: Resource not found
879
+ - 409: Conflict (resource already exists)
880
+ - 429: Rate limit exceeded
881
+ - 500: Internal server error
882
+
883
+ NetworkError: When network connectivity fails
884
+
885
+ Example:
886
+ # Basic usage
887
+ client = Lineage()
888
+
889
+ result = client.lineageCreateBulk(args=...)
890
+ print(f"Result: {result}")
891
+
892
+ # With detailed data
893
+ data = {
894
+ 'name': 'My Resource',
895
+ 'description': 'Resource description',
896
+ 'attributes': {
897
+ 'key1': 'value1',
898
+ 'key2': 'value2'
899
+ }
900
+ }
901
+
902
+ result = client.lineageCreateBulk(data)
903
+ print(f"Created/Updated: {result['guid']}")
904
+
905
+ Use Cases:
906
+ - Data Onboarding: Register new data sources in catalog
907
+ - Metadata Management: Add descriptive metadata to assets
908
+ - Automation: Programmatically populate catalog
909
+ """
910
+ self.method = "POST"
911
+ self.endpoint = ENDPOINTS["lineage"]["create_lineage"]
912
+ self.params = get_api_version_params("datamap")
913
+
914
+ # Process input file (CSV or JSON)
915
+ input_file = args.get("--inputFile")
916
+ if input_file:
917
+ lineage_data = self._process_lineage_file(input_file, args)
918
+ else:
919
+ lineage_data = get_json(args, "--payloadFile")
920
+
921
+ self.payload = lineage_data
922
+
923
+ def _process_lineage_file(self, input_file, args):
924
+ """Process lineage input file (CSV or JSON) and convert to API format"""
925
+ import pandas as pd
926
+ import os
927
+
928
+ file_ext = os.path.splitext(input_file)[1].lower()
929
+
930
+ if file_ext == '.csv':
931
+ return self._process_csv_lineage(input_file, args)
932
+ elif file_ext == '.json':
933
+ with open(input_file, 'r') as f:
934
+ return json.load(f)
935
+ else:
936
+ raise ValueError(f"Unsupported file format: {file_ext}. Supported formats: .csv, .json")
937
+
938
+ def _process_csv_direct_lineage(self, csv_file, df, args):
939
+ """Process CSV file for direct lineage relationships (UI-style)"""
940
+ import pandas as pd
941
+
942
+ # Create direct lineage relationships
943
+ relationships = []
944
+
945
+ for idx, row in df.iterrows():
946
+ # Get relationship type
947
+ relationship_type = str(row.get('relationship_type', 'direct_lineage_dataset_dataset')).strip()
948
+
949
+ # Clean GUIDs
950
+ source_guid = str(row['source_entity_guid']).strip().replace('guid=', '').strip('"')
951
+ target_guid = str(row['target_entity_guid']).strip().replace('guid=', '').strip('"')
952
+
953
+ # Get entity types
954
+ source_type = row.get('source_type', 'DataSet')
955
+ target_type = row.get('target_type', 'DataSet')
956
+
957
+ # Get column mapping if present
958
+ column_mapping = str(row.get('columnMapping', row.get('column_mapping', '')))
959
+
960
+ # Create direct lineage relationship
961
+ relationship = {
962
+ "typeName": relationship_type,
963
+ "guid": f"-{idx + 1}", # Negative GUID for auto-generation
964
+ "end1": {
965
+ "guid": source_guid,
966
+ "typeName": source_type
967
+ },
968
+ "end2": {
969
+ "guid": target_guid,
970
+ "typeName": target_type
971
+ },
972
+ "attributes": {
973
+ "columnMapping": column_mapping
974
+ }
975
+ }
976
+
977
+ relationships.append(relationship)
978
+
979
+ # Return format for relationship creation
980
+ return {
981
+ "relationships": relationships
982
+ }
983
+
984
+ def _process_csv_lineage(self, csv_file, args):
985
+ """Process CSV file and convert to lineage API format"""
986
+ import pandas as pd
987
+
988
+ # Read CSV file
989
+ df = pd.read_csv(csv_file)
990
+
991
+ # Determine which format is being used (GUID-based or qualified name-based)
992
+ has_guid_columns = 'source_entity_guid' in df.columns and 'target_entity_guid' in df.columns
993
+ has_qn_columns = 'source_qualified_name' in df.columns and 'target_qualified_name' in df.columns
994
+
995
+ if not has_guid_columns and not has_qn_columns:
996
+ raise ValueError(
997
+ "CSV must contain either (source_entity_guid, target_entity_guid) "
998
+ "or (source_qualified_name, target_qualified_name) columns"
999
+ )
1000
+
1001
+ # Check if any row uses direct_lineage_dataset_dataset type
1002
+ # If so, we'll create relationships instead of Process entities
1003
+ use_direct_lineage = False
1004
+ if 'relationship_type' in df.columns:
1005
+ use_direct_lineage = any(df['relationship_type'].str.contains('direct_lineage_dataset_dataset', na=False))
1006
+
1007
+ if use_direct_lineage:
1008
+ # Create direct relationships (UI-style lineage)
1009
+ return self._process_csv_direct_lineage(csv_file, df, args)
1010
+
1011
+ # Generate lineage entities (relationships are defined via inputs/outputs attributes)
1012
+ lineage_entities = []
1013
+
1014
+ for idx, row in df.iterrows():
1015
+ # Create process entity for each lineage relationship
1016
+ # Use unique negative GUIDs (-1, -2, -3, ...) to let Atlas auto-generate the GUID for each Process
1017
+ process_guid = f"-{idx + 1}"
1018
+ process_name = row.get('process_name', f"Process_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{idx}")
1019
+
1020
+ # Prepare inputs/outputs based on format
1021
+ if has_guid_columns:
1022
+ # Clean GUIDs (remove guid= prefix and quotes)
1023
+ source_guid = str(row['source_entity_guid']).strip().replace('guid=', '').strip('"')
1024
+ target_guid = str(row['target_entity_guid']).strip().replace('guid=', '').strip('"')
1025
+
1026
+ inputs = [{"guid": source_guid, "typeName": row.get('source_type', 'DataSet')}]
1027
+ outputs = [{"guid": target_guid, "typeName": row.get('target_type', 'DataSet')}]
1028
+ else:
1029
+ inputs = [{"typeName": row.get('source_type', 'DataSet'), "uniqueAttributes": {"qualifiedName": row['source_qualified_name']}}]
1030
+ outputs = [{"typeName": row.get('target_type', 'DataSet'), "uniqueAttributes": {"qualifiedName": row['target_qualified_name']}}]
1031
+
1032
+ # Process entity - let Atlas generate the GUID
1033
+ process_entity = {
1034
+ "guid": process_guid,
1035
+ "typeName": "Process",
1036
+ "attributes": {
1037
+ "qualifiedName": f"{process_name}@{args.get('--cluster', 'default')}",
1038
+ "name": process_name,
1039
+ "description": str(row.get('description', '')),
1040
+ "owner": str(row.get('owner', '')),
1041
+ "inputs": inputs,
1042
+ "outputs": outputs
1043
+ },
1044
+ "classifications": [],
1045
+ "meanings": []
1046
+ }
1047
+
1048
+ # Add custom attributes if present
1049
+ custom_attrs = ['confidence_score', 'metadata', 'tags']
1050
+ for attr in custom_attrs:
1051
+ if attr in row and pd.notna(row[attr]) and str(row[attr]).strip():
1052
+ if attr == 'tags':
1053
+ process_entity["attributes"][attr] = str(row[attr]).split(',')
1054
+ elif attr == 'metadata':
1055
+ try:
1056
+ process_entity["attributes"][attr] = json.loads(str(row[attr]))
1057
+ except json.JSONDecodeError:
1058
+ process_entity["attributes"][attr] = str(row[attr])
1059
+ else:
1060
+ process_entity["attributes"][attr] = row[attr]
1061
+
1062
+ lineage_entities.append(process_entity)
1063
+
1064
+ # Note: Relationships are now defined via the inputs/outputs attributes in the Process entity
1065
+ # No need to create separate relationship objects
1066
+
1067
+ return {
1068
+ "entities": lineage_entities,
1069
+ "referredEntities": {}
1070
+ }
1071
+
1072
+ # === CSV LINEAGE OPERATIONS ===
1073
+
1074
+ @decorator
1075
+ def lineageCSVProcess(self, args):
1076
+ """
1077
+ Perform operation on resource.
1078
+
1079
+
1080
+
1081
+ Args:
1082
+ args: Dictionary of operation arguments.
1083
+ Contains operation-specific parameters.
1084
+ See method implementation for details.
1085
+
1086
+ Returns:
1087
+ [TODO: Specify return type and structure]
1088
+ [TODO: Document nested fields]
1089
+
1090
+ Raises:
1091
+ ValueError: When required parameters are missing or invalid:
1092
+ - Empty or None values for required fields
1093
+ - Invalid GUID format
1094
+ - Out-of-range values
1095
+
1096
+ AuthenticationError: When Azure credentials are invalid:
1097
+ - DefaultAzureCredential not configured
1098
+ - Insufficient permissions
1099
+ - Expired authentication token
1100
+
1101
+ HTTPError: When Purview API returns error:
1102
+ - 400: Bad request (invalid parameters)
1103
+ - 401: Unauthorized (authentication failed)
1104
+ - 403: Forbidden (insufficient permissions)
1105
+ - 404: Resource not found
1106
+ - 429: Rate limit exceeded
1107
+ - 500: Internal server error
1108
+
1109
+ NetworkError: When network connectivity fails
1110
+
1111
+ Example:
1112
+ # Basic usage
1113
+ client = Lineage()
1114
+
1115
+ result = client.lineageCSVProcess(args=...)
1116
+ print(f"Result: {result}")
1117
+
1118
+ Use Cases:
1119
+ - [TODO: Add specific use cases for this operation]
1120
+ - [TODO: Include business context]
1121
+ - [TODO: Explain when to use this method]
1122
+ """
1123
+ csv_file = args.get("csv_file") or args.get("--csv-file")
1124
+ if not csv_file:
1125
+ raise ValueError("CSV file path is required")
1126
+
1127
+ # Process CSV and create lineage payload
1128
+ lineage_data = self._process_csv_lineage(csv_file, args)
1129
+
1130
+ # Check if this is direct lineage (relationships) or Process lineage (entities)
1131
+ if "relationships" in lineage_data and "entities" not in lineage_data:
1132
+ # Direct lineage - use relationship bulk API
1133
+ self.method = "POST"
1134
+ self.endpoint = ENDPOINTS["relationship"]["bulk_create_relationships"]
1135
+ self.params = get_api_version_params("datamap")
1136
+ self.payload = lineage_data["relationships"]
1137
+ else:
1138
+ # Process lineage - use entity bulk API
1139
+ self.method = "POST"
1140
+ self.endpoint = ENDPOINTS["entity"]["bulk_create_or_update"]
1141
+ self.params = get_api_version_params("datamap")
1142
+ self.payload = lineage_data
1143
+
1144
+ # Return the payload for inspection (actual API call handled by decorator)
1145
+ return lineage_data
1146
+
1147
+ def lineageCSVValidate(self, args):
1148
+ """
1149
+ Perform operation on resource.
1150
+
1151
+
1152
+
1153
+ Args:
1154
+ args: Dictionary of operation arguments.
1155
+ Contains operation-specific parameters.
1156
+ See method implementation for details.
1157
+
1158
+ Returns:
1159
+ [TODO: Specify return type and structure]
1160
+ [TODO: Document nested fields]
1161
+
1162
+ Raises:
1163
+ ValueError: When required parameters are missing or invalid:
1164
+ - Empty or None values for required fields
1165
+ - Invalid GUID format
1166
+ - Out-of-range values
1167
+
1168
+ AuthenticationError: When Azure credentials are invalid:
1169
+ - DefaultAzureCredential not configured
1170
+ - Insufficient permissions
1171
+ - Expired authentication token
1172
+
1173
+ HTTPError: When Purview API returns error:
1174
+ - 400: Bad request (invalid parameters)
1175
+ - 401: Unauthorized (authentication failed)
1176
+ - 403: Forbidden (insufficient permissions)
1177
+ - 404: Resource not found
1178
+ - 429: Rate limit exceeded
1179
+ - 500: Internal server error
1180
+
1181
+ NetworkError: When network connectivity fails
1182
+
1183
+ Example:
1184
+ # Basic usage
1185
+ client = Lineage()
1186
+
1187
+ result = client.lineageCSVValidate(args=...)
1188
+ print(f"Result: {result}")
1189
+
1190
+ Use Cases:
1191
+ - [TODO: Add specific use cases for this operation]
1192
+ - [TODO: Include business context]
1193
+ - [TODO: Explain when to use this method]
1194
+ """
1195
+ import pandas as pd
1196
+
1197
+ csv_file = args.get("csv_file") or args.get("--csv-file")
1198
+ if not csv_file:
1199
+ return {"success": False, "error": "CSV file path is required"}
1200
+
1201
+ try:
1202
+ # Read CSV
1203
+ df = pd.read_csv(csv_file)
1204
+
1205
+ # Check required columns
1206
+ required_columns = ['source_entity_guid', 'target_entity_guid']
1207
+ missing_columns = [col for col in required_columns if col not in df.columns]
1208
+
1209
+ if missing_columns:
1210
+ return {
1211
+ "success": False,
1212
+ "error": f"Missing required columns: {', '.join(missing_columns)}",
1213
+ "expected_columns": required_columns
1214
+ }
1215
+
1216
+ # Validate GUIDs format
1217
+ import re
1218
+ guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
1219
+
1220
+ invalid_guids = []
1221
+ for idx, row in df.iterrows():
1222
+ source_guid = str(row['source_entity_guid']).strip()
1223
+ target_guid = str(row['target_entity_guid']).strip()
1224
+
1225
+ # Remove guid= prefix if present
1226
+ source_guid = source_guid.replace('guid=', '').strip('"')
1227
+ target_guid = target_guid.replace('guid=', '').strip('"')
1228
+
1229
+ if not guid_pattern.match(source_guid):
1230
+ invalid_guids.append(f"Row {int(idx) + 1}: Invalid source GUID '{source_guid}'")
1231
+ if not guid_pattern.match(target_guid):
1232
+ invalid_guids.append(f"Row {int(idx) + 1}: Invalid target GUID '{target_guid}'")
1233
+
1234
+ if invalid_guids:
1235
+ return {
1236
+ "success": False,
1237
+ "error": "Invalid GUID format(s) found",
1238
+ "details": invalid_guids
1239
+ }
1240
+
1241
+ return {
1242
+ "success": True,
1243
+ "rows": len(df),
1244
+ "columns": list(df.columns)
1245
+ }
1246
+
1247
+ except Exception as e:
1248
+ return {"success": False, "error": str(e)}
1249
+
1250
+ def lineageCSVSample(self, args):
1251
+ """
1252
+ Perform operation on resource.
1253
+
1254
+
1255
+
1256
+ Args:
1257
+ args: Dictionary of operation arguments.
1258
+ Contains operation-specific parameters.
1259
+ See method implementation for details.
1260
+
1261
+ Returns:
1262
+ [TODO: Specify return type and structure]
1263
+ [TODO: Document nested fields]
1264
+
1265
+ Raises:
1266
+ ValueError: When required parameters are missing or invalid:
1267
+ - Empty or None values for required fields
1268
+ - Invalid GUID format
1269
+ - Out-of-range values
1270
+
1271
+ AuthenticationError: When Azure credentials are invalid:
1272
+ - DefaultAzureCredential not configured
1273
+ - Insufficient permissions
1274
+ - Expired authentication token
1275
+
1276
+ HTTPError: When Purview API returns error:
1277
+ - 400: Bad request (invalid parameters)
1278
+ - 401: Unauthorized (authentication failed)
1279
+ - 403: Forbidden (insufficient permissions)
1280
+ - 404: Resource not found
1281
+ - 429: Rate limit exceeded
1282
+ - 500: Internal server error
1283
+
1284
+ NetworkError: When network connectivity fails
1285
+
1286
+ Example:
1287
+ # Basic usage
1288
+ client = Lineage()
1289
+
1290
+ result = client.lineageCSVSample(args=...)
1291
+ print(f"Result: {result}")
1292
+
1293
+ Use Cases:
1294
+ - [TODO: Add specific use cases for this operation]
1295
+ - [TODO: Include business context]
1296
+ - [TODO: Explain when to use this method]
1297
+ """
1298
+ sample_data = """source_entity_guid,target_entity_guid,relationship_type,process_name,description,confidence_score,owner,metadata
1299
+ ea3412c3-7387-4bc1-9923-11f6f6f60000,2d21eba5-b08b-4571-b31d-7bf6f6f60000,Process,ETL_Customer_Transform,Transform customer data,0.95,data-engineering,"{""tool"": ""Azure Data Factory""}"
1300
+ 2d21eba5-b08b-4571-b31d-7bf6f6f60000,4fae348b-e960-42f7-834c-38f6f6f60000,Process,Customer_Address_Join,Join customer with address,0.90,data-engineering,"{""tool"": ""Databricks""}"
1301
+ """
1302
+ output_file = args.get("--output-file") or args.get("output_file") or "lineage_sample.csv"
1303
+
1304
+ try:
1305
+ with open(output_file, 'w', encoding='utf-8') as f:
1306
+ f.write(sample_data)
1307
+
1308
+ return {
1309
+ "success": True,
1310
+ "file": output_file,
1311
+ "message": f"Sample CSV file created: {output_file}"
1312
+ }
1313
+ except Exception as e:
1314
+ return {"success": False, "error": str(e)}
1315
+
1316
+ def lineageCSVTemplates(self, args):
1317
+ """
1318
+ Perform operation on resource.
1319
+
1320
+
1321
+
1322
+ Args:
1323
+ args: Dictionary of operation arguments.
1324
+ Contains operation-specific parameters.
1325
+ See method implementation for details.
1326
+
1327
+ Returns:
1328
+ [TODO: Specify return type and structure]
1329
+ [TODO: Document nested fields]
1330
+
1331
+ Raises:
1332
+ ValueError: When required parameters are missing or invalid:
1333
+ - Empty or None values for required fields
1334
+ - Invalid GUID format
1335
+ - Out-of-range values
1336
+
1337
+ AuthenticationError: When Azure credentials are invalid:
1338
+ - DefaultAzureCredential not configured
1339
+ - Insufficient permissions
1340
+ - Expired authentication token
1341
+
1342
+ HTTPError: When Purview API returns error:
1343
+ - 400: Bad request (invalid parameters)
1344
+ - 401: Unauthorized (authentication failed)
1345
+ - 403: Forbidden (insufficient permissions)
1346
+ - 404: Resource not found
1347
+ - 429: Rate limit exceeded
1348
+ - 500: Internal server error
1349
+
1350
+ NetworkError: When network connectivity fails
1351
+
1352
+ Example:
1353
+ # Basic usage
1354
+ client = Lineage()
1355
+
1356
+ result = client.lineageCSVTemplates(args=...)
1357
+ print(f"Result: {result}")
1358
+
1359
+ Use Cases:
1360
+ - [TODO: Add specific use cases for this operation]
1361
+ - [TODO: Include business context]
1362
+ - [TODO: Explain when to use this method]
1363
+ """
1364
+ templates = {
1365
+ "basic": {
1366
+ "columns": ["source_entity_guid", "target_entity_guid", "relationship_type", "process_name"],
1367
+ "description": "Basic lineage with source, target, and process name"
1368
+ },
1369
+ "detailed": {
1370
+ "columns": ["source_entity_guid", "target_entity_guid", "relationship_type", "process_name", "description", "confidence_score", "owner", "metadata"],
1371
+ "description": "Detailed lineage with additional metadata"
1372
+ },
1373
+ "qualified_names": {
1374
+ "columns": ["source_qualified_name", "target_qualified_name", "source_type", "target_type", "process_name", "description"],
1375
+ "description": "Lineage using qualified names instead of GUIDs"
1376
+ }
1377
+ }
1378
+
1379
+ return {
1380
+ "templates": templates,
1381
+ "recommended": "detailed"
1382
+ }
1383
+
1384
+ # === LINEAGE ANALYTICS AND REPORTING ===
1385
+
1386
+ @decorator
1387
+ def lineageReadAnalytics(self, args):
1388
+ """
1389
+ Retrieve lineage information information.
1390
+
1391
+ Retrieves detailed information about the specified lineage information.
1392
+ Returns complete lineage information metadata and properties.
1393
+
1394
+ Args:
1395
+ args: Dictionary of operation arguments.
1396
+ Contains operation-specific parameters.
1397
+ See method implementation for details.
1398
+
1399
+ Returns:
1400
+ Dictionary containing lineage information information:
1401
+ {
1402
+ 'guid': str, # Unique identifier
1403
+ 'name': str, # Resource name
1404
+ 'attributes': dict, # Resource attributes
1405
+ 'status': str, # Resource status
1406
+ 'updateTime': int # Last update timestamp
1407
+ }
1408
+
1409
+ Raises:
1410
+ ValueError: When required parameters are missing or invalid:
1411
+ - Empty or None values for required fields
1412
+ - Invalid GUID format
1413
+ - Out-of-range values
1414
+
1415
+ AuthenticationError: When Azure credentials are invalid:
1416
+ - DefaultAzureCredential not configured
1417
+ - Insufficient permissions
1418
+ - Expired authentication token
1419
+
1420
+ HTTPError: When Purview API returns error:
1421
+ - 400: Bad request (invalid parameters)
1422
+ - 401: Unauthorized (authentication failed)
1423
+ - 403: Forbidden (insufficient permissions)
1424
+ - 404: Resource not found
1425
+ - 429: Rate limit exceeded
1426
+ - 500: Internal server error
1427
+
1428
+ NetworkError: When network connectivity fails
1429
+
1430
+ Example:
1431
+ # Basic usage
1432
+ client = Lineage()
1433
+
1434
+ result = client.lineageReadAnalytics(args=...)
1435
+ print(f"Result: {result}")
1436
+
1437
+ Use Cases:
1438
+ - Data Discovery: Find and explore data assets
1439
+ - Compliance Auditing: Review metadata and classifications
1440
+ - Reporting: Generate catalog reports
1441
+ """
1442
+ self.method = "GET"
1443
+ self.endpoint = f"{ENDPOINTS['lineage']['get'].format(guid=args['--guid'])}/analytics"
1444
+ self.params = {
1445
+ **get_api_version_params("datamap"),
1446
+ "startTime": args.get("--startTime"),
1447
+ "endTime": args.get("--endTime"),
1448
+ "metrics": args.get("--metrics", "all"),
1449
+ "aggregation": args.get("--aggregation", "daily"),
1450
+ }
1451
+
1452
+ @decorator
1453
+ def lineageGenerateReport(self, args):
1454
+ """
1455
+ Perform operation on resource.
1456
+
1457
+
1458
+
1459
+ Args:
1460
+ args: Dictionary of operation arguments.
1461
+ Contains operation-specific parameters.
1462
+ See method implementation for details.
1463
+
1464
+ Returns:
1465
+ [TODO: Specify return type and structure]
1466
+ [TODO: Document nested fields]
1467
+
1468
+ Raises:
1469
+ ValueError: When required parameters are missing or invalid:
1470
+ - Empty or None values for required fields
1471
+ - Invalid GUID format
1472
+ - Out-of-range values
1473
+
1474
+ AuthenticationError: When Azure credentials are invalid:
1475
+ - DefaultAzureCredential not configured
1476
+ - Insufficient permissions
1477
+ - Expired authentication token
1478
+
1479
+ HTTPError: When Purview API returns error:
1480
+ - 400: Bad request (invalid parameters)
1481
+ - 401: Unauthorized (authentication failed)
1482
+ - 403: Forbidden (insufficient permissions)
1483
+ - 404: Resource not found
1484
+ - 429: Rate limit exceeded
1485
+ - 500: Internal server error
1486
+
1487
+ NetworkError: When network connectivity fails
1488
+
1489
+ Example:
1490
+ # Basic usage
1491
+ client = Lineage()
1492
+
1493
+ result = client.lineageGenerateReport(args=...)
1494
+ print(f"Result: {result}")
1495
+
1496
+ Use Cases:
1497
+ - [TODO: Add specific use cases for this operation]
1498
+ - [TODO: Include business context]
1499
+ - [TODO: Explain when to use this method]
1500
+ """
1501
+ self.method = "POST"
1502
+ self.endpoint = f"{ENDPOINTS['lineage']['get'].format(guid=args['--guid'])}/report"
1503
+ self.params = {
1504
+ **get_api_version_params("datamap"),
1505
+ "format": args.get("--format", "json"),
1506
+ "includeDetails": str(args.get("--includeDetails", True)).lower(),
1507
+ }
1508
+ self.payload = get_json(args, "--payloadFile") if args.get("--payloadFile") else {}
1509
+
1510
+ # === LINEAGE DISCOVERY AND SEARCH ===
1511
+
1512
+ @decorator
1513
+ def lineageSearch(self, args):
1514
+ """
1515
+ Search for lineage informations.
1516
+
1517
+ Searches for resources matching the specified criteria.
1518
+ Supports filtering, pagination, and sorting.
1519
+
1520
+ Args:
1521
+ args: Dictionary of operation arguments.
1522
+ Contains operation-specific parameters.
1523
+ See method implementation for details.
1524
+
1525
+ Returns:
1526
+ Dictionary containing search results:
1527
+ {
1528
+ 'value': [...] # List of matching resources
1529
+ 'count': int, # Total results count
1530
+ 'nextLink': str # Pagination link (if applicable)
1531
+ }
1532
+
1533
+ Raises:
1534
+ ValueError: When required parameters are missing or invalid:
1535
+ - Empty or None values for required fields
1536
+ - Invalid GUID format
1537
+ - Out-of-range values
1538
+
1539
+ AuthenticationError: When Azure credentials are invalid:
1540
+ - DefaultAzureCredential not configured
1541
+ - Insufficient permissions
1542
+ - Expired authentication token
1543
+
1544
+ HTTPError: When Purview API returns error:
1545
+ - 400: Bad request (invalid parameters)
1546
+ - 401: Unauthorized (authentication failed)
1547
+ - 403: Forbidden (insufficient permissions)
1548
+ - 404: Resource not found
1549
+ - 429: Rate limit exceeded
1550
+ - 500: Internal server error
1551
+
1552
+ NetworkError: When network connectivity fails
1553
+
1554
+ Example:
1555
+ # Basic usage
1556
+ client = Lineage()
1557
+
1558
+ result = client.lineageSearch(args=...)
1559
+ print(f"Result: {result}")
1560
+
1561
+ Use Cases:
1562
+ - Data Discovery: Locate datasets by name or properties
1563
+ - Impact Analysis: Find all assets related to a term
1564
+ - Compliance: Identify sensitive data across catalog
1565
+ """
1566
+ self.method = "GET"
1567
+ self.endpoint = f"{ENDPOINTS['lineage']['get'].replace('/{guid}', '/search')}"
1568
+ self.params = {
1569
+ **get_api_version_params("datamap"),
1570
+ "query": args.get("--query"),
1571
+ "entityType": args.get("--entityType"),
1572
+ "direction": args.get("--direction", "BOTH"),
1573
+ "limit": args.get("--limit", 50),
1574
+ "offset": args.get("--offset", 0),
1575
+ }
1576
+
1577
+ # === LEGACY COMPATIBILITY METHODS ===
1578
+
1579
+ @decorator
1580
+ def lineageReadByGuid(self, args):
1581
+ """
1582
+ Retrieve lineage information information.
1583
+
1584
+ Retrieves detailed information about the specified lineage information.
1585
+ Returns complete lineage information metadata and properties.
1586
+
1587
+ Args:
1588
+ args: Dictionary of operation arguments.
1589
+ Contains operation-specific parameters.
1590
+ See method implementation for details.
1591
+
1592
+ Returns:
1593
+ Dictionary containing lineage information information:
1594
+ {
1595
+ 'guid': str, # Unique identifier
1596
+ 'name': str, # Resource name
1597
+ 'attributes': dict, # Resource attributes
1598
+ 'status': str, # Resource status
1599
+ 'updateTime': int # Last update timestamp
1600
+ }
1601
+
1602
+ Raises:
1603
+ ValueError: When required parameters are missing or invalid:
1604
+ - Empty or None values for required fields
1605
+ - Invalid GUID format
1606
+ - Out-of-range values
1607
+
1608
+ AuthenticationError: When Azure credentials are invalid:
1609
+ - DefaultAzureCredential not configured
1610
+ - Insufficient permissions
1611
+ - Expired authentication token
1612
+
1613
+ HTTPError: When Purview API returns error:
1614
+ - 400: Bad request (invalid parameters)
1615
+ - 401: Unauthorized (authentication failed)
1616
+ - 403: Forbidden (insufficient permissions)
1617
+ - 404: Resource not found
1618
+ - 429: Rate limit exceeded
1619
+ - 500: Internal server error
1620
+
1621
+ NetworkError: When network connectivity fails
1622
+
1623
+ Example:
1624
+ # Basic usage
1625
+ client = Lineage()
1626
+
1627
+ result = client.lineageReadByGuid(args=...)
1628
+ print(f"Result: {result}")
1629
+
1630
+ Use Cases:
1631
+ - Data Discovery: Find and explore data assets
1632
+ - Compliance Auditing: Review metadata and classifications
1633
+ - Reporting: Generate catalog reports
1634
+ """
1635
+ return self.lineageRead(args)
1636
+
1637
+ @decorator
1638
+ def lineageReadByUniqueAttribute(self, args):
1639
+ """
1640
+ Retrieve lineage information information.
1641
+
1642
+ Retrieves detailed information about the specified lineage information.
1643
+ Returns complete lineage information metadata and properties.
1644
+
1645
+ Args:
1646
+ args: Dictionary of operation arguments.
1647
+ Contains operation-specific parameters.
1648
+ See method implementation for details.
1649
+
1650
+ Returns:
1651
+ Dictionary containing lineage information information:
1652
+ {
1653
+ 'guid': str, # Unique identifier
1654
+ 'name': str, # Resource name
1655
+ 'attributes': dict, # Resource attributes
1656
+ 'status': str, # Resource status
1657
+ 'updateTime': int # Last update timestamp
1658
+ }
1659
+
1660
+ Raises:
1661
+ ValueError: When required parameters are missing or invalid:
1662
+ - Empty or None values for required fields
1663
+ - Invalid GUID format
1664
+ - Out-of-range values
1665
+
1666
+ AuthenticationError: When Azure credentials are invalid:
1667
+ - DefaultAzureCredential not configured
1668
+ - Insufficient permissions
1669
+ - Expired authentication token
1670
+
1671
+ HTTPError: When Purview API returns error:
1672
+ - 400: Bad request (invalid parameters)
1673
+ - 401: Unauthorized (authentication failed)
1674
+ - 403: Forbidden (insufficient permissions)
1675
+ - 404: Resource not found
1676
+ - 429: Rate limit exceeded
1677
+ - 500: Internal server error
1678
+
1679
+ NetworkError: When network connectivity fails
1680
+
1681
+ Example:
1682
+ # Basic usage
1683
+ client = Lineage()
1684
+
1685
+ result = client.lineageReadByUniqueAttribute(args=...)
1686
+ print(f"Result: {result}")
1687
+
1688
+ Use Cases:
1689
+ - Data Discovery: Find and explore data assets
1690
+ - Compliance Auditing: Review metadata and classifications
1691
+ - Reporting: Generate catalog reports
1692
+ """
1693
+ return self.lineageReadUniqueAttribute(args)
1694
+
1695
+ @decorator
1696
+ def lineageReadNext(self, args):
1697
+ """
1698
+ Retrieve lineage information information.
1699
+
1700
+ Retrieves detailed information about the specified lineage information.
1701
+ Returns complete lineage information metadata and properties.
1702
+
1703
+ Args:
1704
+ args: Dictionary of operation arguments.
1705
+ Contains operation-specific parameters.
1706
+ See method implementation for details.
1707
+
1708
+ Returns:
1709
+ Dictionary containing lineage information information:
1710
+ {
1711
+ 'guid': str, # Unique identifier
1712
+ 'name': str, # Resource name
1713
+ 'attributes': dict, # Resource attributes
1714
+ 'status': str, # Resource status
1715
+ 'updateTime': int # Last update timestamp
1716
+ }
1717
+
1718
+ Raises:
1719
+ ValueError: When required parameters are missing or invalid:
1720
+ - Empty or None values for required fields
1721
+ - Invalid GUID format
1722
+ - Out-of-range values
1723
+
1724
+ AuthenticationError: When Azure credentials are invalid:
1725
+ - DefaultAzureCredential not configured
1726
+ - Insufficient permissions
1727
+ - Expired authentication token
1728
+
1729
+ HTTPError: When Purview API returns error:
1730
+ - 400: Bad request (invalid parameters)
1731
+ - 401: Unauthorized (authentication failed)
1732
+ - 403: Forbidden (insufficient permissions)
1733
+ - 404: Resource not found
1734
+ - 429: Rate limit exceeded
1735
+ - 500: Internal server error
1736
+
1737
+ NetworkError: When network connectivity fails
1738
+
1739
+ Example:
1740
+ # Basic usage
1741
+ client = Lineage()
1742
+
1743
+ result = client.lineageReadNext(args=...)
1744
+ print(f"Result: {result}")
1745
+
1746
+ Use Cases:
1747
+ - Data Discovery: Find and explore data assets
1748
+ - Compliance Auditing: Review metadata and classifications
1749
+ - Reporting: Generate catalog reports
1750
+ """
1751
+ return self.lineageReadNextPage(args)
1752
+
1753
+ def lineageCreateColumnLevel(self, args):
1754
+ """
1755
+ Create column-level lineage between tables (supports 1 source → N targets).
1756
+
1757
+ This method creates Process entities that link specific columns from a source table
1758
+ to columns in target table(s), establishing column-level data lineage.
1759
+
1760
+ Args:
1761
+ args: Dictionary containing:
1762
+ --source-table-guid: GUID of the source table
1763
+ --target-table-guids: List of GUIDs of target tables (or single GUID for backward compat)
1764
+ --source-column: Name of the source column
1765
+ --target-columns: List of target column names (or single name for backward compat)
1766
+ --process-name: Optional name for the process (default: auto-generated)
1767
+ --description: Optional description
1768
+ --owner: Optional owner (default: data-engineering)
1769
+ --validate-types: Boolean to validate column type compatibility
1770
+
1771
+ Returns:
1772
+ Dictionary with status and created entities
1773
+
1774
+ Raises:
1775
+ ValueError: When required parameters are missing
1776
+ HTTPError: When API returns error status
1777
+
1778
+ Example:
1779
+ # Single target
1780
+ client = Lineage()
1781
+ args = {
1782
+ "--source-table-guid": "abc-123",
1783
+ "--target-table-guids": ["def-456"],
1784
+ "--source-column": "CityKey",
1785
+ "--target-columns": ["CityKey"],
1786
+ }
1787
+
1788
+ # Multiple targets
1789
+ args = {
1790
+ "--source-table-guid": "abc-123",
1791
+ "--target-table-guids": ["def-456", "ghi-789"],
1792
+ "--source-column": "CityKey",
1793
+ "--target-columns": ["CityKey", "City_ID"],
1794
+ }
1795
+ result = client.lineageCreateColumnLevel(args)
1796
+
1797
+ Use Cases:
1798
+ - ETL Documentation: Document column transformations
1799
+ - Data Lineage: Track data flow at column level
1800
+ - Impact Analysis: Understand column dependencies
1801
+ - Multi-target mapping: One source feeding multiple targets
1802
+ """
1803
+ from .endpoint import get_data
1804
+
1805
+ # Extract parameters with backward compatibility
1806
+ source_table_guid = args.get("--source-table-guid")
1807
+
1808
+ # Support both old (single) and new (multiple) formats
1809
+ target_table_guids = args.get("--target-table-guids")
1810
+ if not target_table_guids:
1811
+ # Backward compatibility: single target
1812
+ single_target = args.get("--target-table-guid")
1813
+ target_table_guids = [single_target] if single_target else []
1814
+
1815
+ source_column_name = args.get("--source-column")
1816
+
1817
+ target_columns = args.get("--target-columns")
1818
+ if not target_columns:
1819
+ # Backward compatibility: single column
1820
+ single_column = args.get("--target-column")
1821
+ target_columns = [single_column] if single_column else []
1822
+
1823
+ # Validation
1824
+ if not source_table_guid:
1825
+ raise ValueError("Missing required parameter: --source-table-guid")
1826
+ if not source_column_name:
1827
+ raise ValueError("Missing required parameter: --source-column")
1828
+ if not target_table_guids or len(target_table_guids) == 0:
1829
+ raise ValueError("Missing required parameter: --target-table-guids (or --target-table-guid)")
1830
+ if not target_columns or len(target_columns) == 0:
1831
+ raise ValueError("Missing required parameter: --target-columns (or --target-column)")
1832
+
1833
+ if len(target_table_guids) != len(target_columns):
1834
+ raise ValueError(f"Mismatch: {len(target_table_guids)} target tables but {len(target_columns)} target columns")
1835
+
1836
+ # Extract optional parameters (defined here for use in loop)
1837
+ process_name = args.get("--process-name")
1838
+ description = args.get("--description")
1839
+ owner = args.get("--owner", "data-engineering")
1840
+ validate_types = args.get("--validate-types", False)
1841
+
1842
+ # Step 1: Get source table columns using the sync client
1843
+ source_table = get_data({
1844
+ "app": "catalog",
1845
+ "method": "GET",
1846
+ "endpoint": f"/datamap/api/atlas/v2/entity/guid/{source_table_guid}",
1847
+ "params": get_api_version_params("datamap")
1848
+ })
1849
+
1850
+ if not source_table or isinstance(source_table, dict) and source_table.get("status") == "error":
1851
+ return {"status": "error", "message": f"Failed to get source table: {source_table}"}
1852
+
1853
+ source_columns_list = source_table.get('entity', {}).get('relationshipAttributes', {}).get('columns', [])
1854
+
1855
+ source_column = None
1856
+ for col in source_columns_list:
1857
+ if col.get('displayText', '').lower() == source_column_name.lower():
1858
+ source_column = col
1859
+ break
1860
+
1861
+ if not source_column:
1862
+ available_cols = [c.get('displayText') for c in source_columns_list]
1863
+ return {"status": "error", "message": f"Source column '{source_column_name}' not found. Available: {available_cols}"}
1864
+
1865
+ source_column_guid = source_column['guid']
1866
+ source_data_type = source_column.get('attributes', {}).get('dataType', 'unknown')
1867
+
1868
+ # Step 2: Process each target (multi-target support)
1869
+ results = []
1870
+ all_entities = []
1871
+ all_relationships = []
1872
+ relationship_guid_counter = -2 # Start from -2 for relationship GUIDs
1873
+
1874
+ for idx, (target_table_guid, target_column_name) in enumerate(zip(target_table_guids, target_columns)):
1875
+ # Get target table columns
1876
+ target_table = get_data({
1877
+ "app": "catalog",
1878
+ "method": "GET",
1879
+ "endpoint": f"/datamap/api/atlas/v2/entity/guid/{target_table_guid}",
1880
+ "params": get_api_version_params("datamap")
1881
+ })
1882
+
1883
+ if not target_table or isinstance(target_table, dict) and target_table.get("status") == "error":
1884
+ results.append({
1885
+ "target_index": idx,
1886
+ "target_table_guid": target_table_guid,
1887
+ "target_column": target_column_name,
1888
+ "status": "error",
1889
+ "message": f"Failed to get target table: {target_table}"
1890
+ })
1891
+ continue
1892
+
1893
+ target_columns_list = target_table.get('entity', {}).get('relationshipAttributes', {}).get('columns', [])
1894
+
1895
+ target_column = None
1896
+ for col in target_columns_list:
1897
+ if col.get('displayText', '').lower() == target_column_name.lower():
1898
+ target_column = col
1899
+ break
1900
+
1901
+ if not target_column:
1902
+ available_cols = [c.get('displayText') for c in target_columns_list]
1903
+ results.append({
1904
+ "target_index": idx,
1905
+ "target_table_guid": target_table_guid,
1906
+ "target_column": target_column_name,
1907
+ "status": "error",
1908
+ "message": f"Target column '{target_column_name}' not found. Available: {available_cols}"
1909
+ })
1910
+ continue
1911
+
1912
+ target_column_guid = target_column['guid']
1913
+ target_data_type = target_column.get('attributes', {}).get('dataType', 'unknown')
1914
+
1915
+ # Type validation if requested
1916
+ if validate_types:
1917
+ if not self._are_types_compatible(source_data_type, target_data_type):
1918
+ results.append({
1919
+ "target_index": idx,
1920
+ "target_table_guid": target_table_guid,
1921
+ "target_column": target_column_name,
1922
+ "status": "error",
1923
+ "message": f"Type mismatch: source '{source_data_type}' not compatible with target '{target_data_type}'"
1924
+ })
1925
+ continue
1926
+
1927
+ # Generate unique qualified name and process name
1928
+ process_guid = f"-{idx + 1}" # -1, -2, -3, etc. for each process
1929
+ qualified_name = f"ColumnMapping_{source_column_name}_{source_table_guid}_to_{target_column_name}_{target_table_guid}@default"
1930
+
1931
+ default_process_name = f"{source_column_name}_to_{target_column_name}_Mapping"
1932
+ final_process_name = process_name if process_name else default_process_name
1933
+
1934
+ default_description = f"Column lineage: {source_column_name} -> {target_column_name}"
1935
+ final_description = description if description else default_description
1936
+
1937
+ # Create Process entity for this target
1938
+ process_entity = {
1939
+ "guid": process_guid,
1940
+ "typeName": "Process",
1941
+ "attributes": {
1942
+ "qualifiedName": qualified_name,
1943
+ "name": final_process_name,
1944
+ "description": final_description,
1945
+ "owner": owner,
1946
+ "inputs": [{"guid": source_column_guid, "typeName": "column"}],
1947
+ "outputs": [{"guid": target_column_guid, "typeName": "column"}]
1948
+ },
1949
+ "classifications": [],
1950
+ "meanings": []
1951
+ }
1952
+
1953
+ all_entities.append(process_entity)
1954
+
1955
+ # Create relationships for this process
1956
+ input_relationship = {
1957
+ "guid": str(relationship_guid_counter),
1958
+ "typeName": "dataset_process_inputs",
1959
+ "end1": {
1960
+ "guid": source_column_guid,
1961
+ "typeName": "column"
1962
+ },
1963
+ "end2": {
1964
+ "guid": process_guid,
1965
+ "typeName": "Process"
1966
+ }
1967
+ }
1968
+ relationship_guid_counter -= 1
1969
+
1970
+ output_relationship = {
1971
+ "guid": str(relationship_guid_counter),
1972
+ "typeName": "process_dataset_outputs",
1973
+ "end1": {
1974
+ "guid": process_guid,
1975
+ "typeName": "Process"
1976
+ },
1977
+ "end2": {
1978
+ "guid": target_column_guid,
1979
+ "typeName": "column"
1980
+ }
1981
+ }
1982
+ relationship_guid_counter -= 1
1983
+
1984
+ all_relationships.append(input_relationship)
1985
+ all_relationships.append(output_relationship)
1986
+
1987
+ results.append({
1988
+ "target_index": idx,
1989
+ "target_table_guid": target_table_guid,
1990
+ "target_column": target_column_name,
1991
+ "status": "pending"
1992
+ })
1993
+
1994
+ # Check if any targets succeeded
1995
+ if not all_entities:
1996
+ return {
1997
+ "status": "error",
1998
+ "message": "All targets failed validation",
1999
+ "results": results
2000
+ }
2001
+
2002
+ # Step 3: Create all lineages in a single bulk operation
2003
+ column_lineage_payload = {
2004
+ "entities": all_entities,
2005
+ "relationships": all_relationships
2006
+ }
2007
+
2008
+ # Step 4: Create the lineage using the sync client
2009
+ api_result = get_data({
2010
+ "app": "catalog",
2011
+ "method": "POST",
2012
+ "endpoint": ENDPOINTS["entity"]["bulk_create_or_update"],
2013
+ "params": get_api_version_params("datamap"),
2014
+ "payload": column_lineage_payload
2015
+ })
2016
+
2017
+ # Update results with success status
2018
+ created_entities = api_result.get('mutatedEntities', {}).get('CREATE', []) if api_result else []
2019
+ for result in results:
2020
+ if result['status'] == 'pending':
2021
+ result['status'] = 'success'
2022
+
2023
+ return {
2024
+ "status": "success",
2025
+ "message": f"Created {len(all_entities)} column lineage(s)",
2026
+ "created_count": len(all_entities),
2027
+ "results": results,
2028
+ "api_response": api_result
2029
+ }
2030
+
2031
+ def _are_types_compatible(self, source_type, target_type):
2032
+ """
2033
+ Check if source and target column types are compatible for lineage.
2034
+
2035
+ Args:
2036
+ source_type: Source column data type
2037
+ target_type: Target column data type
2038
+
2039
+ Returns:
2040
+ Boolean indicating compatibility
2041
+ """
2042
+ # Normalize types
2043
+ source = source_type.lower() if source_type else 'unknown'
2044
+ target = target_type.lower() if target_type else 'unknown'
2045
+
2046
+ # Exact match
2047
+ if source == target:
2048
+ return True
2049
+
2050
+ # Integer family compatibility
2051
+ int_types = {'int', 'integer', 'bigint', 'smallint', 'tinyint', 'long'}
2052
+ if source in int_types and target in int_types:
2053
+ return True
2054
+
2055
+ # Float/decimal family compatibility
2056
+ float_types = {'float', 'double', 'decimal', 'numeric', 'real'}
2057
+ if source in float_types and target in float_types:
2058
+ return True
2059
+
2060
+ # String family compatibility
2061
+ string_types = {'string', 'varchar', 'char', 'text', 'nvarchar', 'nchar'}
2062
+ if source in string_types and target in string_types:
2063
+ return True
2064
+
2065
+ # Date/time family compatibility
2066
+ datetime_types = {'date', 'datetime', 'datetime2', 'timestamp', 'time'}
2067
+ if source in datetime_types and target in datetime_types:
2068
+ return True
2069
+
2070
+ # Allow promotion from int to float
2071
+ if source in int_types and target in float_types:
2072
+ return True
2073
+
2074
+ # Unknown types are compatible (permissive approach)
2075
+ if source == 'unknown' or target == 'unknown':
2076
+ return True
2077
+
2078
+ return False
2079
+
2080
+ @decorator
2081
+ def lineageCreateDirect(self, args):
2082
+ """
2083
+ Create direct lineage between two datasets (UI-style lineage without visible Process).
2084
+
2085
+ This creates a direct_lineage_dataset_dataset relationship, which is what Purview UI uses
2086
+ when you manually create lineage. The Process is created internally but hidden in the UI.
2087
+
2088
+ Args:
2089
+ args: Dictionary with keys:
2090
+ --source-guid: Source entity GUID
2091
+ --target-guid: Target entity GUID
2092
+ --source-type: Source entity type (e.g., azure_sql_table)
2093
+ --target-type: Target entity type (e.g., azure_sql_table)
2094
+ --column-mapping: Optional column mapping JSON string
2095
+
2096
+ Returns:
2097
+ Created relationship details
2098
+
2099
+ Example:
2100
+ client = Lineage()
2101
+ result = client.lineageCreateDirect({
2102
+ "--source-guid": "9ebbd583-4987-4d1b-b4f5-d8f6f6f60000",
2103
+ "--target-guid": "52c7d566-87ab-4753-a23a-d3f6f6f60000",
2104
+ "--source-type": "azure_sql_table",
2105
+ "--target-type": "azure_sql_table",
2106
+ "--column-mapping": ""
2107
+ })
2108
+ """
2109
+ source_guid = args.get("--source-guid")
2110
+ target_guid = args.get("--target-guid")
2111
+ source_type = args.get("--source-type", "DataSet")
2112
+ target_type = args.get("--target-type", "DataSet")
2113
+ column_mapping = args.get("--column-mapping", "")
2114
+
2115
+ if not source_guid or not target_guid:
2116
+ raise ValueError("Both --source-guid and --target-guid are required")
2117
+
2118
+ # Create direct lineage relationship (UI-style)
2119
+ relationship = {
2120
+ "typeName": "direct_lineage_dataset_dataset",
2121
+ "guid": "-1", # Let Atlas generate
2122
+ "end1": {
2123
+ "guid": source_guid,
2124
+ "typeName": source_type
2125
+ },
2126
+ "end2": {
2127
+ "guid": target_guid,
2128
+ "typeName": target_type
2129
+ },
2130
+ "attributes": {
2131
+ "columnMapping": column_mapping
2132
+ }
2133
+ }
2134
+
2135
+ self.method = "POST"
2136
+ self.endpoint = ENDPOINTS["relationship"]["create"]
2137
+ self.params = get_api_version_params("datamap")
2138
+ self.payload = relationship