pvw-cli 1.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pvw-cli might be problematic. Click here for more details.

Files changed (60) hide show
  1. purviewcli/__init__.py +27 -0
  2. purviewcli/__main__.py +15 -0
  3. purviewcli/cli/__init__.py +5 -0
  4. purviewcli/cli/account.py +199 -0
  5. purviewcli/cli/cli.py +170 -0
  6. purviewcli/cli/collections.py +502 -0
  7. purviewcli/cli/domain.py +361 -0
  8. purviewcli/cli/entity.py +2436 -0
  9. purviewcli/cli/glossary.py +533 -0
  10. purviewcli/cli/health.py +250 -0
  11. purviewcli/cli/insight.py +113 -0
  12. purviewcli/cli/lineage.py +1103 -0
  13. purviewcli/cli/management.py +141 -0
  14. purviewcli/cli/policystore.py +103 -0
  15. purviewcli/cli/relationship.py +75 -0
  16. purviewcli/cli/scan.py +357 -0
  17. purviewcli/cli/search.py +527 -0
  18. purviewcli/cli/share.py +478 -0
  19. purviewcli/cli/types.py +831 -0
  20. purviewcli/cli/unified_catalog.py +3540 -0
  21. purviewcli/cli/workflow.py +402 -0
  22. purviewcli/client/__init__.py +21 -0
  23. purviewcli/client/_account.py +1877 -0
  24. purviewcli/client/_collections.py +1761 -0
  25. purviewcli/client/_domain.py +414 -0
  26. purviewcli/client/_entity.py +3545 -0
  27. purviewcli/client/_glossary.py +3233 -0
  28. purviewcli/client/_health.py +501 -0
  29. purviewcli/client/_insight.py +2873 -0
  30. purviewcli/client/_lineage.py +2138 -0
  31. purviewcli/client/_management.py +2202 -0
  32. purviewcli/client/_policystore.py +2915 -0
  33. purviewcli/client/_relationship.py +1351 -0
  34. purviewcli/client/_scan.py +2607 -0
  35. purviewcli/client/_search.py +1472 -0
  36. purviewcli/client/_share.py +272 -0
  37. purviewcli/client/_types.py +2708 -0
  38. purviewcli/client/_unified_catalog.py +5112 -0
  39. purviewcli/client/_workflow.py +2734 -0
  40. purviewcli/client/api_client.py +1295 -0
  41. purviewcli/client/business_rules.py +675 -0
  42. purviewcli/client/config.py +231 -0
  43. purviewcli/client/data_quality.py +433 -0
  44. purviewcli/client/endpoint.py +123 -0
  45. purviewcli/client/endpoints.py +554 -0
  46. purviewcli/client/exceptions.py +38 -0
  47. purviewcli/client/lineage_visualization.py +797 -0
  48. purviewcli/client/monitoring_dashboard.py +712 -0
  49. purviewcli/client/rate_limiter.py +30 -0
  50. purviewcli/client/retry_handler.py +125 -0
  51. purviewcli/client/scanning_operations.py +523 -0
  52. purviewcli/client/settings.py +1 -0
  53. purviewcli/client/sync_client.py +250 -0
  54. purviewcli/plugins/__init__.py +1 -0
  55. purviewcli/plugins/plugin_system.py +709 -0
  56. pvw_cli-1.2.8.dist-info/METADATA +1618 -0
  57. pvw_cli-1.2.8.dist-info/RECORD +60 -0
  58. pvw_cli-1.2.8.dist-info/WHEEL +5 -0
  59. pvw_cli-1.2.8.dist-info/entry_points.txt +3 -0
  60. pvw_cli-1.2.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1295 @@
1
+ """
2
+ Microsoft Purview API Client
3
+ Supports the latest Microsoft Purview REST API specifications with comprehensive automation capabilities
4
+ """
5
+
6
+ import json
7
+ import asyncio
8
+ try:
9
+ import aiohttp
10
+ except Exception:
11
+ aiohttp = None
12
+ import pandas as pd
13
+ from typing import Dict, List, Optional, Union, Any
14
+ from dataclasses import dataclass
15
+ from azure.identity.aio import DefaultAzureCredential
16
+ from azure.core.exceptions import ClientAuthenticationError
17
+ import logging
18
+ from datetime import datetime
19
+ import os
20
+ import sys
21
+ from .endpoints import ENDPOINTS, DATAMAP_API_VERSION, format_endpoint, get_api_version_params
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ @dataclass
27
+ class PurviewConfig:
28
+ """Configuration for Purview API Client"""
29
+
30
+ account_name: str
31
+ tenant_id: Optional[str] = None
32
+ client_id: Optional[str] = None
33
+ client_secret: Optional[str] = None
34
+ azure_region: Optional[str] = None
35
+ max_retries: int = 3
36
+ timeout: int = 30
37
+ batch_size: int = 100
38
+
39
+
40
+ class PurviewClient:
41
+ """Purview API Client with comprehensive automation support"""
42
+
43
+ def __init__(self, config: PurviewConfig):
44
+ self.config = config
45
+ self._token = None
46
+ self._credential = None
47
+ self._session = None
48
+ self._setup_endpoints()
49
+
50
+ def _setup_endpoints(self):
51
+ """Setup API endpoints based on Azure region"""
52
+ if self.config.azure_region and self.config.azure_region.lower() == "china":
53
+ self.purview_endpoint = f"https://{self.config.account_name}.purview.azure.cn"
54
+ self.management_endpoint = "https://management.chinacloudapi.cn"
55
+ self.auth_scope = "https://purview.azure.cn/.default"
56
+ elif self.config.azure_region and self.config.azure_region.lower() == "usgov":
57
+ self.purview_endpoint = f"https://{self.config.account_name}.purview.azure.us"
58
+ self.management_endpoint = "https://management.usgovcloudapi.net"
59
+ self.auth_scope = "https://purview.azure.us/.default"
60
+ else:
61
+ self.purview_endpoint = f"https://{self.config.account_name}.purview.azure.com"
62
+ self.management_endpoint = "https://management.azure.com"
63
+ self.auth_scope = "https://purview.azure.net/.default"
64
+
65
+ async def __aenter__(self):
66
+ """Async context manager entry"""
67
+ await self._initialize_session()
68
+ return self
69
+
70
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
71
+ """Async context manager exit"""
72
+ if self._session:
73
+ await self._session.close()
74
+ if self._credential:
75
+ await self._credential.close()
76
+
77
+ async def _initialize_session(self):
78
+ """Initialize HTTP session and authentication"""
79
+ if aiohttp is None:
80
+ raise RuntimeError(
81
+ "The 'aiohttp' package is required for Purview async operations. "
82
+ "Install it in your environment (e.g. '.venv\\Scripts\\pip.exe install aiohttp' or 'pip install aiohttp')."
83
+ )
84
+ self._credential = DefaultAzureCredential()
85
+
86
+ try:
87
+ token = await self._credential.get_token(self.auth_scope)
88
+ self._token = token.token
89
+ except ClientAuthenticationError as e:
90
+ logger.error(f"Authentication failed: {e}")
91
+ raise
92
+
93
+ connector = aiohttp.TCPConnector(limit=100, limit_per_host=30)
94
+ timeout = aiohttp.ClientTimeout(total=self.config.timeout)
95
+
96
+ self._session = aiohttp.ClientSession(
97
+ connector=connector,
98
+ timeout=timeout,
99
+ headers={
100
+ "Authorization": f"Bearer {self._token}",
101
+ "Content-Type": "application/json",
102
+ "User-Agent": f"pvw-cli/2.0",
103
+ },
104
+ )
105
+
106
+ async def _make_request(self, method: str, endpoint: str, **kwargs) -> Dict:
107
+ """Make HTTP request with retry logic"""
108
+ url = f"{self.purview_endpoint}{endpoint}"
109
+ params = kwargs.get("params", {})
110
+ params["api-version"] = DATAMAP_API_VERSION
111
+ kwargs["params"] = params
112
+
113
+ for attempt in range(self.config.max_retries):
114
+ try:
115
+ async with self._session.request(method, url, **kwargs) as response:
116
+ response.raise_for_status()
117
+ return await response.json()
118
+ except aiohttp.ClientError as e:
119
+ logger.error(f"Request failed on attempt {attempt + 1}: {e}")
120
+ if attempt == self.config.max_retries - 1:
121
+ raise
122
+
123
+ async def _refresh_token(self):
124
+ """Refresh authentication token"""
125
+ token = await self._credential.get_token(self.auth_scope)
126
+ self._token = token.token
127
+ self._session.headers.update({"Authorization": f"Bearer {self._token}"})
128
+
129
+ # Data Map API Methods
130
+ async def get_entity(self, guid: str, **kwargs) -> Dict:
131
+ """
132
+ Get a Purview entity by its unique GUID.
133
+
134
+ Args:
135
+ guid: The unique GUID identifier of the entity
136
+ **kwargs: Additional query parameters (e.g., minExtInfo, ignoreRelationships)
137
+
138
+ Returns:
139
+ Dict containing entity details including:
140
+ - guid: Entity unique identifier
141
+ - typeName: Entity type (e.g., "azure_sql_table")
142
+ - attributes: Entity attributes (name, qualifiedName, etc.)
143
+ - classifications: Applied classifications/tags
144
+ - relationshipAttributes: Related entities
145
+
146
+ Raises:
147
+ ClientAuthenticationError: If authentication fails
148
+ ValueError: If guid is invalid or entity not found
149
+
150
+ Example:
151
+ entity = await client.get_entity("a1b2c3d4-e5f6-7890-abcd-ef1234567890")
152
+ print(entity["attributes"]["name"])
153
+ """
154
+ endpoint = format_endpoint(ENDPOINTS["entity"]["get"], guid=guid)
155
+ return await self._make_request("GET", endpoint, params=kwargs)
156
+
157
+ async def create_entity(self, entity_data: Dict) -> Dict:
158
+ """
159
+ Create a new entity in the Purview catalog.
160
+
161
+ Args:
162
+ entity_data: Dictionary containing entity information with required fields:
163
+ - typeName (str): Entity type (e.g., "azure_sql_table", "DataSet")
164
+ - attributes (dict): Entity attributes including:
165
+ - name (str): Display name
166
+ - qualifiedName (str): Unique qualified name
167
+ - Additional type-specific attributes
168
+ - Optional: classifications, relationshipAttributes
169
+
170
+ Returns:
171
+ Dict containing created entity details with assigned GUID
172
+
173
+ Raises:
174
+ ValueError: If required fields are missing or invalid
175
+
176
+ Example:
177
+ entity = await client.create_entity({
178
+ "typeName": "DataSet",
179
+ "attributes": {
180
+ "name": "Sales Data",
181
+ "qualifiedName": "sales_data@tenant",
182
+ "description": "Monthly sales records"
183
+ }
184
+ })
185
+ """
186
+ return await self._make_request(
187
+ "POST", ENDPOINTS["entity"]["create_or_update"], json=entity_data
188
+ )
189
+
190
+ async def update_entity(self, entity_data: Dict) -> Dict:
191
+ """
192
+ Update an existing entity in the Purview catalog.
193
+
194
+ Args:
195
+ entity_data: Dictionary containing entity update with:
196
+ - guid (str): Entity GUID to update (required)
197
+ - typeName (str): Entity type
198
+ - attributes (dict): Updated attributes
199
+
200
+ Returns:
201
+ Dict containing updated entity details
202
+
203
+ Raises:
204
+ ValueError: If entity not found or update fails
205
+
206
+ Example:
207
+ updated = await client.update_entity({
208
+ "guid": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
209
+ "typeName": "DataSet",
210
+ "attributes": {"description": "Updated description"}
211
+ })
212
+ """
213
+ return await self._make_request(
214
+ "PUT", ENDPOINTS["entity"]["create_or_update"], json=entity_data
215
+ )
216
+
217
+ async def delete_entity(self, guid: str) -> Dict:
218
+ """
219
+ Delete an entity from the Purview catalog.
220
+
221
+ Args:
222
+ guid: The unique GUID of the entity to delete
223
+
224
+ Returns:
225
+ Dict containing deletion status
226
+
227
+ Raises:
228
+ ValueError: If entity not found
229
+
230
+ Warning:
231
+ This operation is irreversible. All relationships and lineage will be affected.
232
+
233
+ Example:
234
+ result = await client.delete_entity("a1b2c3d4-e5f6-7890-abcd-ef1234567890")
235
+ """
236
+ endpoint = format_endpoint(ENDPOINTS["entity"]["delete"], guid=guid)
237
+ return await self._make_request("DELETE", endpoint)
238
+
239
+ async def search_entities(self, query: str, **kwargs) -> Dict:
240
+ """
241
+ Search for entities in the Purview catalog with advanced filtering.
242
+
243
+ Args:
244
+ query: Search keywords or query string
245
+ **kwargs: Optional search parameters:
246
+ - filter (dict): Filter criteria (e.g., {"typeName": "DataSet"})
247
+ - facets (list): Facets for aggregation
248
+ - limit (int): Maximum results to return (default: 50, max: 1000)
249
+ - offset (int): Pagination offset (default: 0)
250
+
251
+ Returns:
252
+ Dict containing:
253
+ - value: List of matching entities
254
+ - @search.count: Total number of matches
255
+ - @search.facets: Facet aggregations if requested
256
+
257
+ Example:
258
+ results = await client.search_entities(
259
+ "sales",
260
+ filter={"typeName": "azure_sql_table"},
261
+ limit=100
262
+ )
263
+ for entity in results["value"]:
264
+ print(entity["name"])
265
+ """
266
+ search_request = {
267
+ "keywords": query,
268
+ "filter": kwargs.get("filter"),
269
+ "facets": kwargs.get("facets"),
270
+ "limit": kwargs.get("limit", 50),
271
+ "offset": kwargs.get("offset", 0),
272
+ }
273
+ return await self._make_request(
274
+ "POST", ENDPOINTS["discovery"]["query"], json=search_request
275
+ )
276
+
277
+ # Batch Operations
278
+ async def batch_create_entities(
279
+ self, entities: List[Dict], progress_callback=None
280
+ ) -> List[Dict]:
281
+ """
282
+ Create multiple entities in batches to avoid API rate limiting and timeouts.
283
+
284
+ Args:
285
+ entities: List of entity dictionaries to create, each containing:
286
+ - typeName (str): Entity type (e.g., "DataSet", "azure_sql_table")
287
+ - attributes (dict): Entity attributes including name, qualifiedName, etc.
288
+ progress_callback: Optional callback function(processed: int, total: int) for progress tracking
289
+
290
+ Returns:
291
+ List of dictionaries containing created entities with assigned GUIDs and system attributes
292
+
293
+ Raises:
294
+ PurviewException: If batch creation fails due to API errors
295
+ ValueError: If entities contain invalid data or missing required fields
296
+
297
+ Example:
298
+ ```python
299
+ entities = [
300
+ {"typeName": "DataSet", "attributes": {"name": "dataset1", "qualifiedName": "dataset1@purview"}},
301
+ {"typeName": "DataSet", "attributes": {"name": "dataset2", "qualifiedName": "dataset2@purview"}}
302
+ ]
303
+
304
+ def progress(processed, total):
305
+ print(f"Progress: {processed}/{total}")
306
+
307
+ created = await client.batch_create_entities(entities, progress_callback=progress)
308
+ print(f"Created {len(created)} entities")
309
+ ```
310
+
311
+ Use Cases:
312
+ - Import large datasets from external systems into Purview
313
+ - Bulk provisioning of data assets during migration
314
+ - Automated asset registration from data discovery tools
315
+ - Periodic synchronization of assets from source systems
316
+ """
317
+ results = []
318
+ total = len(entities)
319
+
320
+ for i in range(0, total, self.config.batch_size):
321
+ batch = entities[i : i + self.config.batch_size]
322
+ batch_data = {"entities": batch}
323
+
324
+ try:
325
+ result = await self._make_request(
326
+ "POST", ENDPOINTS["entity"]["bulk_create_or_update"], json=batch_data
327
+ )
328
+ results.extend(result.get("mutatedEntities", {}).get("CREATE", []))
329
+
330
+ if progress_callback:
331
+ progress_callback(min(i + self.config.batch_size, total), total)
332
+
333
+ except Exception as e:
334
+ logger.error(f"Batch {i//self.config.batch_size + 1} failed: {e}")
335
+ continue
336
+
337
+ return results
338
+
339
+ async def batch_update_entities(
340
+ self, entities: List[Dict], progress_callback=None
341
+ ) -> List[Dict]:
342
+ """
343
+ Update multiple entities in batches to avoid API rate limiting and timeouts.
344
+
345
+ Args:
346
+ entities: List of entity dictionaries to update, each must include:
347
+ - guid (str): Entity GUID to update
348
+ - attributes (dict): Updated attributes (only changed fields needed)
349
+ progress_callback: Optional callback function(processed: int, total: int) for progress tracking
350
+
351
+ Returns:
352
+ List of dictionaries containing updated entities with modified attributes and timestamps
353
+
354
+ Raises:
355
+ PurviewException: If batch update fails due to API errors
356
+ ValueError: If entities missing GUID or contain invalid data
357
+
358
+ Example:
359
+ ```python
360
+ entities = [
361
+ {"guid": "guid-1", "attributes": {"description": "Updated description"}},
362
+ {"guid": "guid-2", "attributes": {"owner": "newowner@company.com"}}
363
+ ]
364
+
365
+ updated = await client.batch_update_entities(entities)
366
+ print(f"Updated {len(updated)} entities")
367
+ ```
368
+
369
+ Use Cases:
370
+ - Bulk update entity metadata from external systems
371
+ - Apply classification or glossary terms to multiple assets
372
+ - Synchronize ownership or stewardship information
373
+ - Update descriptions and documentation across many entities
374
+ """
375
+ results = []
376
+ total = len(entities)
377
+
378
+ for i in range(0, total, self.config.batch_size):
379
+ batch = entities[i : i + self.config.batch_size]
380
+ batch_data = {"entities": batch}
381
+
382
+ try:
383
+ result = await self._make_request(
384
+ "PUT", ENDPOINTS["entity"]["bulk_create_or_update"], json=batch_data
385
+ )
386
+ results.extend(result.get("mutatedEntities", {}).get("UPDATE", []))
387
+
388
+ if progress_callback:
389
+ progress_callback(min(i + self.config.batch_size, total), total)
390
+
391
+ except Exception as e:
392
+ logger.error(f"Batch {i//self.config.batch_size + 1} failed: {e}")
393
+ continue
394
+
395
+ return results
396
+
397
+ # CSV Import/Export Methods
398
+ async def import_entities_from_csv(self, csv_file_path: str, mapping_config: Dict) -> Dict:
399
+ """
400
+ Import entities from CSV file using column-to-attribute mapping configuration.
401
+
402
+ Args:
403
+ csv_file_path: Path to CSV file containing entity data
404
+ mapping_config: Dictionary specifying how to map CSV columns to entity attributes:
405
+ - typeName (str): Entity type for all imported entities
406
+ - attributes (dict): Mapping of CSV column names to entity attribute names
407
+
408
+ Returns:
409
+ Dict containing import results with created entity GUIDs
410
+
411
+ Raises:
412
+ FileNotFoundError: If CSV file doesn't exist
413
+ ValueError: If mapping_config is invalid or CSV has missing required columns
414
+
415
+ Example:
416
+ ```python
417
+ mapping = {
418
+ "typeName": "azure_sql_table",
419
+ "attributes": {
420
+ "table_name": "name",
421
+ "schema_name": "schema",
422
+ "table_description": "description"
423
+ }
424
+ }
425
+ results = await client.import_entities_from_csv("tables.csv", mapping)
426
+ print(f"Imported {len(results)} entities")
427
+ ```
428
+
429
+ Use Cases:
430
+ - Bulk import assets from external catalogs or CMDBs
431
+ - Migrate metadata from legacy systems to Purview
432
+ - Load entity data from Excel/CSV exports
433
+ - Automate asset registration from data discovery tools
434
+ """
435
+ df = pd.read_csv(csv_file_path)
436
+ entities = []
437
+
438
+ for _, row in df.iterrows():
439
+ entity = self._map_csv_row_to_entity(row, mapping_config)
440
+ if entity:
441
+ entities.append(entity)
442
+
443
+ return await self.batch_create_entities(entities)
444
+
445
+ async def export_entities_to_csv(
446
+ self, query: str, csv_file_path: str, columns: List[str] = None
447
+ ) -> str:
448
+ """
449
+ Export entities matching search query to CSV file.
450
+
451
+ Args:
452
+ query: Search query to find entities (e.g., "*" for all, "type:DataSet" for specific type)
453
+ csv_file_path: Output CSV file path
454
+ columns: Optional list of column names to include (default: all available columns)
455
+
456
+ Returns:
457
+ String message confirming export with count of exported entities
458
+
459
+ Raises:
460
+ PurviewException: If search fails
461
+ IOError: If unable to write CSV file
462
+
463
+ Example:
464
+ ```python
465
+ # Export all DataSet entities
466
+ message = await client.export_entities_to_csv(
467
+ "type:DataSet",
468
+ "datasets.csv",
469
+ columns=["guid", "name", "typeName", "attr_owner"]
470
+ )
471
+ print(message) # "Exported 150 entities to datasets.csv"
472
+ ```
473
+
474
+ Use Cases:
475
+ - Extract metadata for reporting and analysis
476
+ - Create backups of entity metadata
477
+ - Share asset information with stakeholders via CSV
478
+ - Generate data catalogs for external consumption
479
+ """
480
+ search_results = await self.search_entities(query, limit=1000)
481
+ entities = search_results.get("value", [])
482
+
483
+ if not entities:
484
+ return "No entities found"
485
+
486
+ # Convert entities to DataFrame
487
+ flattened_data = []
488
+ for entity in entities:
489
+ flat_entity = self._flatten_entity(entity)
490
+ flattened_data.append(flat_entity)
491
+
492
+ df = pd.DataFrame(flattened_data)
493
+
494
+ if columns:
495
+ df = df[columns] if all(col in df.columns for col in columns) else df
496
+
497
+ df.to_csv(csv_file_path, index=False)
498
+ return f"Exported {len(entities)} entities to {csv_file_path}"
499
+
500
+ def _map_csv_row_to_entity(self, row: pd.Series, mapping_config: Dict) -> Dict:
501
+ """Map CSV row to Purview entity format"""
502
+ try:
503
+ entity = {"typeName": mapping_config.get("typeName", "DataSet"), "attributes": {}}
504
+
505
+ # Map CSV columns to entity attributes
506
+ for csv_col, attr_name in mapping_config.get("attributes", {}).items():
507
+ if csv_col in row and pd.notna(row[csv_col]):
508
+ entity["attributes"][attr_name] = row[csv_col]
509
+
510
+ # Add required attributes if not present
511
+ if "name" not in entity["attributes"] and "name" in row:
512
+ entity["attributes"]["name"] = row["name"]
513
+
514
+ if "qualifiedName" not in entity["attributes"]:
515
+ entity["attributes"][
516
+ "qualifiedName"
517
+ ] = f"{row.get('name', 'unnamed')}@{self.config.account_name}"
518
+
519
+ return entity
520
+ except Exception as e:
521
+ logger.error(f"Failed to map row to entity: {e}")
522
+ return None
523
+
524
+ def _flatten_entity(self, entity: Dict) -> Dict:
525
+ """Flatten entity structure for CSV export"""
526
+ flat = {
527
+ "guid": entity.get("guid"),
528
+ "typeName": entity.get("typeName"),
529
+ "status": entity.get("status"),
530
+ }
531
+
532
+ # Flatten attributes
533
+ attributes = entity.get("attributes", {})
534
+ for key, value in attributes.items():
535
+ if isinstance(value, (str, int, float, bool)):
536
+ flat[f"attr_{key}"] = value
537
+ elif isinstance(value, list) and value:
538
+ flat[f"attr_{key}"] = ", ".join(str(v) for v in value)
539
+
540
+ return flat # Glossary Operations
541
+
542
+ async def get_glossary_terms(self, glossary_guid: str = None) -> List[Dict]:
543
+ """
544
+ Get all glossary terms or terms from a specific glossary.
545
+
546
+ Args:
547
+ glossary_guid: Optional GUID of a specific glossary to filter terms.
548
+ If None, returns all terms from all glossaries.
549
+
550
+ Returns:
551
+ List of dictionaries, each containing term information:
552
+ - guid: Term unique identifier
553
+ - name: Term display name
554
+ - qualifiedName: Fully qualified term name
555
+ - glossaryGuid: Parent glossary GUID
556
+ - status: Term status (Draft, Approved, etc.)
557
+ - definition: Term definition/description
558
+ - abbreviation: Optional abbreviation
559
+ - examples: Optional usage examples
560
+ - attributes: Custom attributes
561
+ - assignedEntities: Entities tagged with this term
562
+
563
+ Example:
564
+ # Get all terms
565
+ all_terms = await client.get_glossary_terms()
566
+
567
+ # Get terms from specific glossary
568
+ glossary_terms = await client.get_glossary_terms("glossary-guid-123")
569
+
570
+ for term in all_terms:
571
+ print(f"{term['name']}: {term.get('definition', 'No definition')}")
572
+ """
573
+ if glossary_guid:
574
+ endpoint = f"{ENDPOINTS['glossary']['terms']}/{glossary_guid}"
575
+ else:
576
+ endpoint = ENDPOINTS["glossary"]["base"]
577
+ return await self._make_request("GET", endpoint)
578
+
579
+ async def create_glossary_term(self, term_data: Dict) -> Dict:
580
+ """
581
+ Create a new glossary term in Purview.
582
+
583
+ Args:
584
+ term_data: Dictionary containing term information with required fields:
585
+ - name (str): Term display name (required)
586
+ - glossaryGuid (str): Parent glossary GUID (required)
587
+ - Optional fields:
588
+ - qualifiedName (str): Auto-generated if not provided
589
+ - definition (str): Term definition/description
590
+ - abbreviation (str): Short form
591
+ - status (str): "Draft", "Approved", "Alert", "Expired"
592
+ - nickName (str): Alternative name
593
+ - examples (list): Usage examples
594
+ - resources (list): Related resources/links
595
+ - contacts (dict): Experts, owners, stewards
596
+ - attributes (dict): Custom attributes
597
+
598
+ Returns:
599
+ Dict containing created term with assigned GUID
600
+
601
+ Raises:
602
+ ValueError: If required fields are missing or glossary not found
603
+
604
+ Example:
605
+ term = await client.create_glossary_term({
606
+ "name": "Customer",
607
+ "glossaryGuid": "glossary-guid-123",
608
+ "definition": "An individual or organization that purchases goods or services",
609
+ "status": "Approved",
610
+ "abbreviation": "CUST",
611
+ "examples": ["Enterprise customer", "Retail customer"]
612
+ })
613
+ print(f"Created term: {term['guid']}")
614
+ """
615
+ return await self._make_request("POST", ENDPOINTS["glossary"]["term"], json=term_data)
616
+
617
+ async def assign_term_to_entities(self, term_guid: str, entity_guids: List[str]) -> Dict:
618
+ """
619
+ Assign a glossary term to multiple entities for business context tagging.
620
+
621
+ Args:
622
+ term_guid: The unique GUID of the glossary term to assign
623
+ entity_guids: List of entity GUIDs to tag with this term
624
+
625
+ Returns:
626
+ Dict containing assignment results with success/failure details
627
+
628
+ Raises:
629
+ ValueError: If term or entities not found
630
+
631
+ Use Case:
632
+ Tag data assets with business glossary terms to provide business context
633
+ and enable business users to discover data using familiar terminology.
634
+
635
+ Example:
636
+ # Tag multiple tables with "Customer" term
637
+ result = await client.assign_term_to_entities(
638
+ term_guid="term-guid-abc",
639
+ entity_guids=[
640
+ "table-guid-1",
641
+ "table-guid-2",
642
+ "table-guid-3"
643
+ ]
644
+ )
645
+ print(f"Tagged {len(entity_guids)} entities")
646
+ """
647
+ assignment_data = {"termGuid": term_guid, "entityGuids": entity_guids}
648
+ endpoint = f"{ENDPOINTS['glossary']['term_assigned_entities']}/{term_guid}"
649
+ return await self._make_request("POST", endpoint, json=assignment_data)
650
+
651
+ # Data Estate Insights
652
+ async def get_asset_distribution(self) -> Dict:
653
+ """
654
+ Get asset distribution insights across the Purview data estate.
655
+
656
+ Returns:
657
+ Dict containing asset distribution statistics including:
658
+ - asset counts by type (DataSet, Table, Column, etc.)
659
+ - asset counts by classification
660
+ - asset counts by collection
661
+ - asset counts by source type
662
+
663
+ Raises:
664
+ PurviewException: If the request fails or API endpoint is unavailable
665
+
666
+ Example:
667
+ ```python
668
+ distribution = await client.get_asset_distribution()
669
+ print(f"Total assets: {distribution.get('totalAssets', 0)}")
670
+ for asset_type, count in distribution.get('assetsByType', {}).items():
671
+ print(f"{asset_type}: {count}")
672
+ ```
673
+
674
+ Use Cases:
675
+ - Generate data estate overview dashboards
676
+ - Monitor asset growth and distribution trends
677
+ - Identify collections with the most assets
678
+ - Create reports on data source coverage
679
+ """
680
+ return await self._make_request("GET", "/mapanddiscover/api/browse")
681
+
682
+ # === ACCOUNT MANAGEMENT (Official API Operations) === async def get_account_properties(self) -> Dict:
683
+ """Get Account Properties - Official API Operation"""
684
+ params = get_api_version_params("account")
685
+ return await self._make_request("GET", ENDPOINTS["account"]["account"], params=params)
686
+
687
+ async def update_account_properties(self, account_data: Dict) -> Dict:
688
+ """
689
+ Update Microsoft Purview account properties and settings.
690
+
691
+ Args:
692
+ account_data: Dictionary containing account properties to update:
693
+ - friendlyName (str): Display name for the account
694
+ - publicNetworkAccess (str): "Enabled" or "Disabled"
695
+ - managedResourceGroupName (str): Resource group name
696
+ - tags (dict): Azure resource tags
697
+
698
+ Returns:
699
+ Dict containing updated account information including:
700
+ - name, id, location, sku
701
+ - properties (friendlyName, publicNetworkAccess, etc.)
702
+ - systemData (created/modified timestamps)
703
+
704
+ Raises:
705
+ PurviewException: If update fails or account not found
706
+ ValueError: If account_data contains invalid properties
707
+
708
+ Example:
709
+ ```python
710
+ updated = await client.update_account_properties({
711
+ "friendlyName": "Production Data Catalog",
712
+ "publicNetworkAccess": "Enabled",
713
+ "tags": {"environment": "production", "department": "data"}
714
+ })
715
+ print(f"Account updated: {updated['properties']['friendlyName']}")
716
+ ```
717
+
718
+ Use Cases:
719
+ - Update account display name for better organization
720
+ - Configure network access policies
721
+ - Add or modify resource tags for cost tracking
722
+ - Update managed resource group settings
723
+ """
724
+ params = get_api_version_params("account")
725
+ return await self._make_request(
726
+ "PATCH", ENDPOINTS["account"]["account_update"], json=account_data, params=params
727
+ )
728
+
729
+ async def get_access_keys(self) -> Dict:
730
+ """
731
+ Retrieve the primary and secondary access keys for the Purview account.
732
+
733
+ Returns:
734
+ Dict containing access key information:
735
+ - atlasKafkaPrimaryEndpoint (str): Primary Kafka endpoint
736
+ - atlasKafkaSecondaryEndpoint (str): Secondary Kafka endpoint
737
+
738
+ Raises:
739
+ PurviewException: If unable to retrieve keys or insufficient permissions
740
+ PermissionError: If caller lacks Key Vault access
741
+
742
+ Example:
743
+ ```python
744
+ keys = await client.get_access_keys()
745
+ primary_key = keys.get('atlasKafkaPrimaryEndpoint')
746
+ print(f"Primary endpoint: {primary_key}")
747
+ ```
748
+
749
+ Use Cases:
750
+ - Configure external applications to connect to Purview event streams
751
+ - Rotate access keys periodically for security
752
+ - Integrate Purview events with Azure Event Hub or Kafka consumers
753
+ - Validate access key availability before deployment
754
+ """
755
+ params = get_api_version_params("account")
756
+ return await self._make_request("POST", ENDPOINTS["account"]["access_keys"], params=params)
757
+
758
+ async def regenerate_access_key(self, key_data: Dict) -> Dict:
759
+ """
760
+ Regenerate the primary or secondary access key for the Purview account.
761
+
762
+ Args:
763
+ key_data: Dictionary specifying which key to regenerate:
764
+ - keyType (str): "PrimaryAtlasKafkaKey" or "SecondaryAtlasKafkaKey"
765
+
766
+ Returns:
767
+ Dict containing the new access key information after regeneration
768
+
769
+ Raises:
770
+ PurviewException: If key regeneration fails
771
+ ValueError: If keyType is invalid
772
+ PermissionError: If caller lacks Key Vault access
773
+
774
+ Example:
775
+ ```python
776
+ # Regenerate primary key
777
+ new_key = await client.regenerate_access_key({
778
+ "keyType": "PrimaryAtlasKafkaKey"
779
+ })
780
+ print(f"Primary key regenerated: {new_key['atlasKafkaPrimaryEndpoint']}")
781
+ ```
782
+
783
+ Use Cases:
784
+ - Rotate keys periodically as part of security best practices
785
+ - Revoke compromised keys and generate new ones
786
+ - Update application configurations with new credentials
787
+ - Implement key rotation automation in CI/CD pipelines
788
+ """
789
+ params = get_api_version_params("account")
790
+ return await self._make_request(
791
+ "POST", ENDPOINTS["account"]["regenerate_access_key"], json=key_data, params=params
792
+ )
793
+
794
+ # === COLLECTIONS MANAGEMENT (Official API Operations) ===
795
+
796
+ async def list_collections(self) -> List[Dict]:
797
+ """
798
+ List all collections in the Purview account.
799
+
800
+ Collections organize data assets into logical hierarchies for access control
801
+ and governance. They form a tree structure with parent-child relationships.
802
+
803
+ Returns:
804
+ List of dictionaries, each containing collection information:
805
+ - name: Collection unique name/identifier
806
+ - friendlyName: Human-readable display name
807
+ - description: Collection description
808
+ - collectionProvisioningState: State (e.g., "Succeeded")
809
+ - parentCollection: Parent collection reference
810
+ - systemData: Creation/modification metadata
811
+
812
+ Example:
813
+ collections = await client.list_collections()
814
+ for col in collections:
815
+ print(f"{col['friendlyName']} ({col['name']})")
816
+ print(f" Parent: {col.get('parentCollection', {}).get('referenceName', 'Root')}")
817
+ """
818
+ params = get_api_version_params("collections")
819
+ return await self._make_request("GET", ENDPOINTS["collections"]["list"], params=params)
820
+
821
+ async def get_collection(self, collection_name: str) -> Dict:
822
+ """
823
+ Get detailed information about a specific collection.
824
+
825
+ Args:
826
+ collection_name: The unique name (not friendlyName) of the collection
827
+
828
+ Returns:
829
+ Dict containing collection details including name, friendlyName, description,
830
+ parent relationships, and provisioning state
831
+
832
+ Raises:
833
+ ValueError: If collection not found
834
+
835
+ Example:
836
+ collection = await client.get_collection("myorg-finance")
837
+ print(f"Collection: {collection['friendlyName']}")
838
+ print(f"Description: {collection.get('description', 'N/A')}")
839
+ """
840
+ endpoint = format_endpoint(ENDPOINTS["collections"]["get"], collectionName=collection_name)
841
+ params = get_api_version_params("collections")
842
+ return await self._make_request("GET", endpoint, params=params)
843
+
844
+ async def create_collection(self, collection_name: str, collection_data: Dict) -> Dict:
845
+ """
846
+ Create a new collection in the Purview account hierarchy.
847
+
848
+ Args:
849
+ collection_name: Unique collection name (used in URLs, no spaces)
850
+ collection_data: Dictionary containing collection properties:
851
+ - friendlyName (str): Display name for the collection
852
+ - description (str): Optional description
853
+ - parentCollection (dict): Reference to parent collection {"referenceName": "parent-name"}
854
+
855
+ Returns:
856
+ Dict containing the created collection with assigned system properties
857
+
858
+ Raises:
859
+ PurviewException: If collection creation fails
860
+ ValueError: If collection_name already exists or parentCollection not found
861
+
862
+ Example:
863
+ ```python
864
+ collection = await client.create_collection("finance-data", {
865
+ "friendlyName": "Finance Data Collection",
866
+ "description": "All financial datasets and reports",
867
+ "parentCollection": {"referenceName": "myorg"}
868
+ })
869
+ print(f"Created: {collection['name']}")
870
+ ```
871
+
872
+ Use Cases:
873
+ - Organize data assets by department or business unit
874
+ - Implement multi-tenant data governance with collection hierarchies
875
+ - Apply role-based access control at the collection level
876
+ - Isolate data assets for compliance or security requirements
877
+ """
878
+ endpoint = format_endpoint(
879
+ ENDPOINTS["collections"]["create_or_update"], collectionName=collection_name
880
+ )
881
+ params = get_api_version_params("collections")
882
+ return await self._make_request("PUT", endpoint, json=collection_data, params=params)
883
+
884
+ async def update_collection(self, collection_name: str, collection_data: Dict) -> Dict:
885
+ """
886
+ Update an existing collection's properties.
887
+
888
+ Args:
889
+ collection_name: The unique name of the collection to update
890
+ collection_data: Dictionary with fields to update:
891
+ - friendlyName (str): New display name
892
+ - description (str): Updated description
893
+ - parentCollection (dict): New parent if moving in hierarchy
894
+
895
+ Returns:
896
+ Dict containing the updated collection information
897
+
898
+ Raises:
899
+ PurviewException: If update fails
900
+ ValueError: If collection_name not found
901
+
902
+ Example:
903
+ ```python
904
+ updated = await client.update_collection("finance-data", {
905
+ "friendlyName": "Finance & Accounting Data",
906
+ "description": "Updated: All financial and accounting datasets"
907
+ })
908
+ print(f"Updated: {updated['friendlyName']}")
909
+ ```
910
+
911
+ Use Cases:
912
+ - Update collection display names and descriptions
913
+ - Reorganize collection hierarchy by changing parent
914
+ - Maintain collection metadata as business needs evolve
915
+ - Correct naming or organizational structure
916
+ """
917
+ endpoint = format_endpoint(
918
+ ENDPOINTS["collections"]["create_or_update"], collectionName=collection_name
919
+ )
920
+ params = get_api_version_params("collections")
921
+ return await self._make_request("PUT", endpoint, json=collection_data, params=params)
922
+
923
+ async def create_or_update_collection(
924
+ self, collection_name: str, collection_data: Dict
925
+ ) -> Dict:
926
+ """
927
+ Create a new collection or update an existing one (upsert operation).
928
+
929
+ Args:
930
+ collection_name: The unique name of the collection
931
+ collection_data: Dictionary containing collection properties (see create_collection)
932
+
933
+ Returns:
934
+ Dict containing the created or updated collection information
935
+
936
+ Raises:
937
+ PurviewException: If operation fails
938
+
939
+ Example:
940
+ ```python
941
+ # Will create if doesn't exist, update if exists
942
+ collection = await client.create_or_update_collection("finance-data", {
943
+ "friendlyName": "Finance Data",
944
+ "description": "Financial datasets"
945
+ })
946
+ ```
947
+
948
+ Use Cases:
949
+ - Idempotent collection management in automation scripts
950
+ - Simplify collection provisioning without checking existence
951
+ - Update collection metadata without separate create/update logic
952
+ - Implement declarative collection configuration
953
+ """
954
+ endpoint = format_endpoint(
955
+ ENDPOINTS["collections"]["create_or_update"], collectionName=collection_name
956
+ )
957
+ params = get_api_version_params("collections")
958
+ return await self._make_request("PUT", endpoint, json=collection_data, params=params)
959
+
960
+ async def delete_collection(self, collection_name: str) -> Dict:
961
+ """
962
+ Delete a collection from the Purview account.
963
+
964
+ Args:
965
+ collection_name: The unique name of the collection to delete
966
+
967
+ Returns:
968
+ Dict containing deletion confirmation (typically empty on success)
969
+
970
+ Raises:
971
+ PurviewException: If deletion fails
972
+ ValueError: If collection not found or still contains assets
973
+
974
+ Example:
975
+ ```python
976
+ await client.delete_collection("finance-data")
977
+ print("Collection deleted successfully")
978
+ ```
979
+
980
+ Use Cases:
981
+ - Remove unused or obsolete collections
982
+ - Clean up test collections after development
983
+ - Reorganize collection hierarchy by removing intermediate levels
984
+ - Implement collection lifecycle management
985
+ """
986
+ endpoint = format_endpoint(
987
+ ENDPOINTS["collections"]["delete"], collectionName=collection_name
988
+ )
989
+ params = get_api_version_params("collections")
990
+ return await self._make_request("DELETE", endpoint, params=params)
991
+
992
+ async def get_collection_path(self, collection_name: str) -> Dict:
993
+ """
994
+ Get the full hierarchical path from root to the specified collection.
995
+
996
+ Args:
997
+ collection_name: The unique name of the collection
998
+
999
+ Returns:
1000
+ Dict containing the collection path information:
1001
+ - parentFriendlyNameChain (list): Ordered list of friendly names from root to parent
1002
+ - parentNameChain (list): Ordered list of collection names from root to parent
1003
+
1004
+ Raises:
1005
+ PurviewException: If request fails
1006
+ ValueError: If collection not found
1007
+
1008
+ Example:
1009
+ ```python
1010
+ path = await client.get_collection_path("finance-reports")
1011
+ print(" > ".join(path['parentFriendlyNameChain']))
1012
+ # Output: "Root > Finance > Reports"
1013
+ ```
1014
+
1015
+ Use Cases:
1016
+ - Display collection breadcrumb navigation in UI
1017
+ - Understand collection hierarchy and relationships
1018
+ - Validate collection positioning in organizational structure
1019
+ - Generate collection path reports for governance
1020
+ """
1021
+ endpoint = format_endpoint(
1022
+ ENDPOINTS["collections"]["get_collection_path"], collectionName=collection_name
1023
+ )
1024
+ params = get_api_version_params("collections")
1025
+ return await self._make_request("GET", endpoint, params=params)
1026
+
1027
+ async def get_child_collection_names(self, collection_name: str) -> List[str]:
1028
+ """
1029
+ Get the names of all immediate child collections under the specified collection.
1030
+
1031
+ Args:
1032
+ collection_name: The unique name of the parent collection
1033
+
1034
+ Returns:
1035
+ List of strings containing child collection names (not friendly names)
1036
+
1037
+ Raises:
1038
+ PurviewException: If request fails
1039
+ ValueError: If parent collection not found
1040
+
1041
+ Example:
1042
+ ```python
1043
+ children = await client.get_child_collection_names("finance")
1044
+ for child in children:
1045
+ print(f"Child collection: {child}")
1046
+ # Output: finance-reports, finance-analytics, finance-archive
1047
+ ```
1048
+
1049
+ Use Cases:
1050
+ - Navigate collection hierarchy programmatically
1051
+ - Build collection tree visualizations
1052
+ - Audit collection structure and organization
1053
+ - Implement recursive collection operations
1054
+ """
1055
+ endpoint = format_endpoint(
1056
+ ENDPOINTS["collections"]["get_child_collection_names"], collectionName=collection_name
1057
+ )
1058
+ params = get_api_version_params("collections")
1059
+ return await self._make_request("GET", endpoint, params=params)
1060
+
1061
+ # Lineage Operations
1062
+ async def get_lineage(self, guid: str, direction: str = "BOTH", depth: int = 3) -> Dict:
1063
+ """
1064
+ Get data lineage for an entity showing upstream sources and downstream consumers.
1065
+
1066
+ Data lineage tracks how data flows between systems, showing transformation paths
1067
+ and dependencies critical for impact analysis and compliance.
1068
+
1069
+ Args:
1070
+ guid: The unique GUID of the entity to get lineage for
1071
+ direction: Lineage direction to retrieve:
1072
+ - "INPUT": Upstream sources (where data comes from)
1073
+ - "OUTPUT": Downstream consumers (where data goes to)
1074
+ - "BOTH": Both upstream and downstream (default)
1075
+ depth: How many levels deep to traverse (default: 3, max: 10)
1076
+ Higher depths may return large result sets
1077
+
1078
+ Returns:
1079
+ Dict containing:
1080
+ - baseEntityGuid: Starting entity GUID
1081
+ - guidEntityMap: Map of all entities in the lineage graph
1082
+ - relations: List of lineage relationships showing data flow
1083
+ - widthCounts: Entity counts at each lineage level
1084
+ - lineageDirection: Requested direction
1085
+ - lineageDepth: Requested depth
1086
+
1087
+ Use Cases:
1088
+ - Impact analysis: "What will break if I change this table?"
1089
+ - Data tracing: "Where does this report's data come from?"
1090
+ - Compliance: "Show the complete data flow for audit"
1091
+
1092
+ Example:
1093
+ # Get full lineage for a table
1094
+ lineage = await client.get_lineage(
1095
+ guid="table-guid-abc",
1096
+ direction="BOTH",
1097
+ depth=5
1098
+ )
1099
+
1100
+ # Analyze upstream sources
1101
+ for rel in lineage["relations"]:
1102
+ if rel["relationshipType"] == "UPSTREAM":
1103
+ source = lineage["guidEntityMap"][rel["fromEntityId"]]
1104
+ print(f"Source: {source['displayName']}")
1105
+ """
1106
+ params = {"direction": direction, "depth": depth}
1107
+ endpoint = f"{ENDPOINTS['lineage']['lineage']}/{guid}"
1108
+ return await self._make_request("GET", endpoint, params=params)
1109
+
1110
+ async def create_lineage(self, lineage_data: Dict) -> Dict:
1111
+ """
1112
+ Create a data lineage relationship between entities.
1113
+
1114
+ Use this to document custom data flows, ETL processes, or transformations
1115
+ not automatically discovered by Purview scanners.
1116
+
1117
+ Args:
1118
+ lineage_data: Dictionary containing lineage relationship with:
1119
+ - typeName (str): Process type (e.g., "Process", "spark_process")
1120
+ - attributes (dict):
1121
+ - name (str): Process name
1122
+ - qualifiedName (str): Unique identifier
1123
+ - inputs (list): List of input entity references
1124
+ - outputs (list): List of output entity references
1125
+
1126
+ Returns:
1127
+ Dict containing created lineage process entity
1128
+
1129
+ Example:
1130
+ # Document an ETL process
1131
+ lineage = await client.create_lineage({
1132
+ "typeName": "Process",
1133
+ "attributes": {
1134
+ "name": "Daily Sales ETL",
1135
+ "qualifiedName": "etl_sales_daily@tenant",
1136
+ "inputs": [
1137
+ {"guid": "source-table-guid"}
1138
+ ],
1139
+ "outputs": [
1140
+ {"guid": "target-table-guid"}
1141
+ ]
1142
+ }
1143
+ })
1144
+ """
1145
+ return await self._make_request("POST", ENDPOINTS["lineage"]["lineage"], json=lineage_data)
1146
+
1147
+ # === CSV IMPORT/EXPORT OPERATIONS ===
1148
+
1149
+ async def import_collections_from_csv(self, csv_file_path: str, progress_callback=None) -> Dict:
1150
+ """Import Collections from CSV file"""
1151
+ import pandas as pd
1152
+
1153
+ if not os.path.exists(csv_file_path):
1154
+ raise ValueError(f"CSV file not found: {csv_file_path}")
1155
+
1156
+ try:
1157
+ df = pd.read_csv(csv_file_path)
1158
+ except Exception as e:
1159
+ raise ValueError(f"Failed to read CSV file: {str(e)}")
1160
+
1161
+ # Validate required columns
1162
+ required_columns = ["collectionName", "friendlyName"]
1163
+ missing_columns = [col for col in required_columns if col not in df.columns]
1164
+ if missing_columns:
1165
+ raise ValueError(
1166
+ f"Missing required columns: {missing_columns}. Required: {required_columns}"
1167
+ )
1168
+
1169
+ results = []
1170
+ total_rows = len(df)
1171
+
1172
+ for index, row in df.iterrows():
1173
+ try:
1174
+ collection_name = row["collectionName"]
1175
+ collection_data = {
1176
+ "friendlyName": row.get("friendlyName", collection_name),
1177
+ "description": row.get("description", ""),
1178
+ "parentCollection": {"referenceName": row.get("parentCollection", "root")},
1179
+ }
1180
+
1181
+ # Create the collection
1182
+ result = await self.create_collection(collection_name, collection_data)
1183
+ results.append(
1184
+ {
1185
+ "row": index + 1,
1186
+ "collectionName": collection_name,
1187
+ "status": "success",
1188
+ "result": result,
1189
+ }
1190
+ )
1191
+
1192
+ if progress_callback:
1193
+ progress_callback(index + 1, total_rows)
1194
+
1195
+ except Exception as e:
1196
+ results.append(
1197
+ {
1198
+ "row": index + 1,
1199
+ "collectionName": row.get("collectionName", "unknown"),
1200
+ "status": "error",
1201
+ "error": str(e),
1202
+ }
1203
+ )
1204
+
1205
+ return {
1206
+ "total_processed": len(results),
1207
+ "successful": len([r for r in results if r["status"] == "success"]),
1208
+ "failed": len([r for r in results if r["status"] == "error"]),
1209
+ "details": results,
1210
+ }
1211
+
1212
+ async def export_collections_to_csv(
1213
+ self, csv_file_path: str, include_hierarchy: bool = True, include_metadata: bool = True
1214
+ ) -> str:
1215
+ """Export Collections to CSV file"""
1216
+ import pandas as pd
1217
+
1218
+ try:
1219
+ # Get all collections
1220
+ collections_data = await self.list_collections()
1221
+
1222
+ if not collections_data or "value" not in collections_data:
1223
+ return "No collections found to export"
1224
+
1225
+ collections = collections_data["value"]
1226
+ export_data = []
1227
+
1228
+ for collection in collections:
1229
+ row_data = {
1230
+ "collectionName": collection.get("name", ""),
1231
+ "friendlyName": collection.get("friendlyName", ""),
1232
+ "description": collection.get("description", ""),
1233
+ "parentCollection": collection.get("parentCollection", {}).get(
1234
+ "referenceName", "root"
1235
+ ),
1236
+ }
1237
+
1238
+ if include_hierarchy:
1239
+ # Try to get collection path for hierarchy info
1240
+ try:
1241
+ if collection.get("name"):
1242
+ path_data = await self.get_collection_path(collection["name"])
1243
+ row_data["collectionPath"] = " > ".join(path_data.get("path", []))
1244
+ row_data["level"] = len(path_data.get("path", [])) - 1
1245
+ except:
1246
+ row_data["collectionPath"] = ""
1247
+ row_data["level"] = 0
1248
+
1249
+ if include_metadata:
1250
+ row_data["systemData_createdAt"] = collection.get("systemData", {}).get(
1251
+ "createdAt", ""
1252
+ )
1253
+ row_data["systemData_lastModifiedAt"] = collection.get("systemData", {}).get(
1254
+ "lastModifiedAt", ""
1255
+ )
1256
+ row_data["systemData_createdBy"] = collection.get("systemData", {}).get(
1257
+ "createdBy", ""
1258
+ )
1259
+
1260
+ export_data.append(row_data)
1261
+
1262
+ # Create DataFrame and export to CSV
1263
+ df = pd.DataFrame(export_data)
1264
+ df.to_csv(csv_file_path, index=False)
1265
+
1266
+ return f"Successfully exported {len(export_data)} collections to {csv_file_path}"
1267
+
1268
+ except Exception as e:
1269
+ raise Exception(f"Failed to export collections to CSV: {str(e)}")
1270
+
1271
+
1272
+ class BatchOperationProgress:
1273
+ """Progress tracker for batch operations"""
1274
+
1275
+ def __init__(self, total: int, description: str = "Processing"):
1276
+ self.total = total
1277
+ self.processed = 0
1278
+ self.description = description
1279
+ self.start_time = datetime.now()
1280
+
1281
+ def update(self, processed: int, total: int):
1282
+ """Update progress"""
1283
+ self.processed = processed
1284
+ self.total = total
1285
+ percentage = (processed / total) * 100 if total > 0 else 0
1286
+ elapsed = datetime.now() - self.start_time
1287
+
1288
+ print(
1289
+ f"\r{self.description}: {processed}/{total} ({percentage:.1f}%) - Elapsed: {elapsed}",
1290
+ end="",
1291
+ flush=True,
1292
+ )
1293
+
1294
+ if processed >= total:
1295
+ print() # New line when complete