pvw-cli 1.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pvw-cli might be problematic. Click here for more details.

Files changed (60) hide show
  1. purviewcli/__init__.py +27 -0
  2. purviewcli/__main__.py +15 -0
  3. purviewcli/cli/__init__.py +5 -0
  4. purviewcli/cli/account.py +199 -0
  5. purviewcli/cli/cli.py +170 -0
  6. purviewcli/cli/collections.py +502 -0
  7. purviewcli/cli/domain.py +361 -0
  8. purviewcli/cli/entity.py +2436 -0
  9. purviewcli/cli/glossary.py +533 -0
  10. purviewcli/cli/health.py +250 -0
  11. purviewcli/cli/insight.py +113 -0
  12. purviewcli/cli/lineage.py +1103 -0
  13. purviewcli/cli/management.py +141 -0
  14. purviewcli/cli/policystore.py +103 -0
  15. purviewcli/cli/relationship.py +75 -0
  16. purviewcli/cli/scan.py +357 -0
  17. purviewcli/cli/search.py +527 -0
  18. purviewcli/cli/share.py +478 -0
  19. purviewcli/cli/types.py +831 -0
  20. purviewcli/cli/unified_catalog.py +3540 -0
  21. purviewcli/cli/workflow.py +402 -0
  22. purviewcli/client/__init__.py +21 -0
  23. purviewcli/client/_account.py +1877 -0
  24. purviewcli/client/_collections.py +1761 -0
  25. purviewcli/client/_domain.py +414 -0
  26. purviewcli/client/_entity.py +3545 -0
  27. purviewcli/client/_glossary.py +3233 -0
  28. purviewcli/client/_health.py +501 -0
  29. purviewcli/client/_insight.py +2873 -0
  30. purviewcli/client/_lineage.py +2138 -0
  31. purviewcli/client/_management.py +2202 -0
  32. purviewcli/client/_policystore.py +2915 -0
  33. purviewcli/client/_relationship.py +1351 -0
  34. purviewcli/client/_scan.py +2607 -0
  35. purviewcli/client/_search.py +1472 -0
  36. purviewcli/client/_share.py +272 -0
  37. purviewcli/client/_types.py +2708 -0
  38. purviewcli/client/_unified_catalog.py +5112 -0
  39. purviewcli/client/_workflow.py +2734 -0
  40. purviewcli/client/api_client.py +1295 -0
  41. purviewcli/client/business_rules.py +675 -0
  42. purviewcli/client/config.py +231 -0
  43. purviewcli/client/data_quality.py +433 -0
  44. purviewcli/client/endpoint.py +123 -0
  45. purviewcli/client/endpoints.py +554 -0
  46. purviewcli/client/exceptions.py +38 -0
  47. purviewcli/client/lineage_visualization.py +797 -0
  48. purviewcli/client/monitoring_dashboard.py +712 -0
  49. purviewcli/client/rate_limiter.py +30 -0
  50. purviewcli/client/retry_handler.py +125 -0
  51. purviewcli/client/scanning_operations.py +523 -0
  52. purviewcli/client/settings.py +1 -0
  53. purviewcli/client/sync_client.py +250 -0
  54. purviewcli/plugins/__init__.py +1 -0
  55. purviewcli/plugins/plugin_system.py +709 -0
  56. pvw_cli-1.2.8.dist-info/METADATA +1618 -0
  57. pvw_cli-1.2.8.dist-info/RECORD +60 -0
  58. pvw_cli-1.2.8.dist-info/WHEEL +5 -0
  59. pvw_cli-1.2.8.dist-info/entry_points.txt +3 -0
  60. pvw_cli-1.2.8.dist-info/top_level.txt +1 -0
@@ -0,0 +1,30 @@
1
+ import threading
2
+ import time
3
+
4
+ class RateLimiter:
5
+ """
6
+ Simple thread-safe rate limiter using the token bucket algorithm.
7
+ rate_limit_config example: { 'rate': 5, 'per': 1 } # 5 requests per 1 second
8
+ """
9
+ def __init__(self, config=None):
10
+ config = config or {}
11
+ self.rate = config.get('rate', 10) # default: 10 requests
12
+ self.per = config.get('per', 1) # default: per 1 second
13
+ self.allowance = self.rate
14
+ self.last_check = time.monotonic()
15
+ self.lock = threading.Lock()
16
+
17
+ def wait(self):
18
+ with self.lock:
19
+ current = time.monotonic()
20
+ time_passed = current - self.last_check
21
+ self.last_check = current
22
+ self.allowance += time_passed * (self.rate / self.per)
23
+ if self.allowance > self.rate:
24
+ self.allowance = self.rate
25
+ if self.allowance < 1.0:
26
+ sleep_time = (1.0 - self.allowance) * (self.per / self.rate)
27
+ time.sleep(sleep_time)
28
+ self.allowance = 0
29
+ else:
30
+ self.allowance -= 1.0
@@ -0,0 +1,125 @@
1
+ """
2
+ Retry handler for Purview API operations
3
+ """
4
+
5
+ import time
6
+ import random
7
+ import logging
8
+ from typing import Callable, Any, Dict, Optional
9
+ from .exceptions import PurviewAPIError, PurviewRateLimitError
10
+
11
+ class RetryHandler:
12
+ """Handles retry logic for API operations with exponential backoff"""
13
+
14
+ def __init__(self, config: Optional[Dict] = None):
15
+ """
16
+ Initialize retry handler
17
+
18
+ Args:
19
+ config: Retry configuration dictionary
20
+ """
21
+ default_config = {
22
+ 'max_retries': 3,
23
+ 'base_delay': 1.0,
24
+ 'max_delay': 60.0,
25
+ 'exponential_base': 2,
26
+ 'jitter': True,
27
+ 'retry_on_status_codes': [429, 500, 502, 503, 504],
28
+ 'retry_on_exceptions': [ConnectionError, TimeoutError]
29
+ }
30
+
31
+ self.config = {**default_config, **(config or {})}
32
+ self.logger = logging.getLogger(__name__)
33
+
34
+ def execute(self, operation: Callable, *args, **kwargs) -> Any:
35
+ """
36
+ Execute operation with retry logic
37
+
38
+ Args:
39
+ operation: Function to execute
40
+ *args: Positional arguments for operation
41
+ **kwargs: Keyword arguments for operation
42
+
43
+ Returns:
44
+ Result of operation
45
+
46
+ Raises:
47
+ Exception: If all retries exhausted
48
+ """
49
+ last_exception = None
50
+
51
+ for attempt in range(self.config['max_retries'] + 1):
52
+ try:
53
+ return operation(*args, **kwargs)
54
+
55
+ except Exception as e:
56
+ last_exception = e
57
+
58
+ if not self._should_retry(e, attempt):
59
+ raise e
60
+
61
+ if attempt < self.config['max_retries']:
62
+ delay = self._calculate_delay(attempt)
63
+ self.logger.warning(
64
+ f"Operation failed (attempt {attempt + 1}), retrying in {delay:.2f}s: {e}"
65
+ )
66
+ time.sleep(delay)
67
+ else:
68
+ self.logger.error(f"Operation failed after {attempt + 1} attempts: {e}")
69
+ raise e
70
+
71
+ # This should never be reached, but just in case
72
+ raise last_exception
73
+
74
+ def _should_retry(self, exception: Exception, attempt: int) -> bool:
75
+ """
76
+ Determine if operation should be retried
77
+
78
+ Args:
79
+ exception: Exception that occurred
80
+ attempt: Current attempt number
81
+
82
+ Returns:
83
+ True if should retry, False otherwise
84
+ """
85
+ if attempt >= self.config['max_retries']:
86
+ return False
87
+
88
+ # Check for specific exception types
89
+ if type(exception) in self.config['retry_on_exceptions']:
90
+ return True
91
+
92
+ # Check for API errors with specific status codes
93
+ if isinstance(exception, PurviewAPIError):
94
+ if hasattr(exception, 'status_code'):
95
+ return exception.status_code in self.config['retry_on_status_codes']
96
+
97
+ # Check for rate limit errors
98
+ if isinstance(exception, PurviewRateLimitError):
99
+ return True
100
+
101
+ return False
102
+
103
+ def _calculate_delay(self, attempt: int) -> float:
104
+ """
105
+ Calculate delay for retry attempt using exponential backoff
106
+
107
+ Args:
108
+ attempt: Current attempt number
109
+
110
+ Returns:
111
+ Delay in seconds
112
+ """
113
+ delay = self.config['base_delay'] * (
114
+ self.config['exponential_base'] ** attempt
115
+ )
116
+
117
+ # Apply maximum delay limit
118
+ delay = min(delay, self.config['max_delay'])
119
+
120
+ # Add jitter to prevent thundering herd
121
+ if self.config['jitter']:
122
+ jitter = random.uniform(0, 0.1) * delay
123
+ delay += jitter
124
+
125
+ return delay
@@ -0,0 +1,523 @@
1
+ """
2
+ Scanning Operations Module for Microsoft Purview
3
+ Provides comprehensive scanning automation and management capabilities
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ from datetime import datetime, timedelta
9
+ from pathlib import Path
10
+ from typing import Dict, List, Optional, Any, Callable
11
+ from rich.console import Console
12
+ from rich.table import Table
13
+ from rich.progress import Progress, TaskID
14
+
15
+ # Optional pandas dependency for report generation
16
+ try:
17
+ import pandas as pd
18
+ PANDAS_AVAILABLE = True
19
+ except ImportError:
20
+ pd = None
21
+ PANDAS_AVAILABLE = False
22
+ print("Warning: pandas not available. Report generation features will be limited.")
23
+
24
+ from .api_client import PurviewClient
25
+
26
+ console = Console()
27
+
28
+ class ScanningManager:
29
+ """Advanced scanning operations and automation"""
30
+
31
+ def __init__(self, client: PurviewClient):
32
+ self.client = client
33
+ self.console = Console()
34
+
35
+ async def create_data_source(self, data_source_config: Dict) -> Dict:
36
+ """Create a new data source"""
37
+ endpoint = "/scan/datasources"
38
+ return await self.client._make_request('PUT', endpoint, json=data_source_config)
39
+
40
+ async def get_data_sources(self) -> List[Dict]:
41
+ """Get all data sources"""
42
+ endpoint = "/scan/datasources"
43
+ response = await self.client._make_request('GET', endpoint)
44
+ return response.get('value', [])
45
+
46
+ async def create_scan(self, data_source_name: str, scan_config: Dict) -> Dict:
47
+ """Create a new scan for a data source"""
48
+ endpoint = f"/scan/datasources/{data_source_name}/scans/{scan_config['name']}"
49
+ return await self.client._make_request('PUT', endpoint, json=scan_config)
50
+
51
+ async def run_scan(self, data_source_name: str, scan_name: str) -> Dict:
52
+ """Start a scan"""
53
+ endpoint = f"/scan/datasources/{data_source_name}/scans/{scan_name}/run"
54
+ return await self.client._make_request('POST', endpoint)
55
+
56
+ async def get_scan_status(self, data_source_name: str, scan_name: str, run_id: str) -> Dict:
57
+ """Get scan status"""
58
+ endpoint = f"/scan/datasources/{data_source_name}/scans/{scan_name}/runs/{run_id}"
59
+ return await self.client._make_request('GET', endpoint)
60
+
61
+ async def get_scan_history(self, data_source_name: str, scan_name: str) -> List[Dict]:
62
+ """Get scan run history"""
63
+ endpoint = f"/scan/datasources/{data_source_name}/scans/{scan_name}/runs"
64
+ response = await self.client._make_request('GET', endpoint)
65
+ return response.get('value', [])
66
+
67
+ async def bulk_create_data_sources(self, sources_config: List[Dict],
68
+ progress_callback: Optional[Callable] = None) -> Dict:
69
+ """Create multiple data sources from configuration"""
70
+ results = {'created': [], 'failed': [], 'errors': []}
71
+
72
+ with Progress() as progress:
73
+ task = progress.add_task("Creating data sources...", total=len(sources_config))
74
+
75
+ for i, source_config in enumerate(sources_config):
76
+ try:
77
+ result = await self.create_data_source(source_config)
78
+ results['created'].append({
79
+ 'name': source_config.get('name'),
80
+ 'type': source_config.get('kind'),
81
+ 'result': result
82
+ })
83
+
84
+ except Exception as e:
85
+ error_msg = f"Failed to create {source_config.get('name', 'unknown')}: {str(e)}"
86
+ results['failed'].append(source_config.get('name', 'unknown'))
87
+ results['errors'].append(error_msg)
88
+
89
+ progress.update(task, advance=1)
90
+ if progress_callback:
91
+ progress_callback(i + 1, len(sources_config))
92
+
93
+ return results
94
+
95
+ async def bulk_run_scans(self, scan_configs: List[Dict],
96
+ monitor_progress: bool = True) -> Dict:
97
+ """Run multiple scans and optionally monitor their progress"""
98
+ results = {'started': [], 'failed': [], 'completed': [], 'errors': []}
99
+
100
+ # Start all scans
101
+ scan_runs = []
102
+ for scan_config in scan_configs:
103
+ try:
104
+ data_source = scan_config['data_source']
105
+ scan_name = scan_config['scan_name']
106
+
107
+ result = await self.run_scan(data_source, scan_name)
108
+ run_id = result.get('runId')
109
+
110
+ if run_id:
111
+ scan_runs.append({
112
+ 'data_source': data_source,
113
+ 'scan_name': scan_name,
114
+ 'run_id': run_id,
115
+ 'started_at': datetime.now()
116
+ })
117
+ results['started'].append(f"{data_source}/{scan_name}")
118
+
119
+ except Exception as e:
120
+ error_msg = f"Failed to start scan {scan_config}: {str(e)}"
121
+ results['failed'].append(str(scan_config))
122
+ results['errors'].append(error_msg)
123
+
124
+ # Monitor progress if requested
125
+ if monitor_progress and scan_runs:
126
+ await self._monitor_scan_progress(scan_runs, results)
127
+
128
+ return results
129
+
130
+ async def _monitor_scan_progress(self, scan_runs: List[Dict], results: Dict):
131
+ """Monitor the progress of running scans"""
132
+ pending_scans = scan_runs.copy()
133
+
134
+ with Progress() as progress:
135
+ # Create progress bars for each scan
136
+ scan_tasks = {}
137
+ for scan in pending_scans:
138
+ scan_id = f"{scan['data_source']}/{scan['scan_name']}"
139
+ task_id = progress.add_task(f"Scanning {scan_id}", total=100)
140
+ scan_tasks[scan_id] = task_id
141
+
142
+ while pending_scans:
143
+ completed_scans = []
144
+
145
+ for scan in pending_scans:
146
+ try:
147
+ status = await self.get_scan_status(
148
+ scan['data_source'],
149
+ scan['scan_name'],
150
+ scan['run_id']
151
+ )
152
+
153
+ scan_state = status.get('status', 'Unknown')
154
+ scan_id = f"{scan['data_source']}/{scan['scan_name']}"
155
+
156
+ if scan_state in ['Succeeded', 'Failed', 'Canceled']:
157
+ completed_scans.append(scan)
158
+ progress.update(scan_tasks[scan_id], completed=100)
159
+
160
+ if scan_state == 'Succeeded':
161
+ results['completed'].append(scan_id)
162
+ else:
163
+ results['failed'].append(scan_id)
164
+ results['errors'].append(f"Scan {scan_id} {scan_state}")
165
+
166
+ elif scan_state == 'Running':
167
+ # Update progress based on scan metrics if available
168
+ scan_result = status.get('scanResultMetrics', {})
169
+ if scan_result:
170
+ processed = scan_result.get('processedCount', 0)
171
+ total = scan_result.get('totalCount', 1)
172
+ percentage = min((processed / total) * 100, 99) if total > 0 else 50
173
+ progress.update(scan_tasks[scan_id], completed=percentage)
174
+
175
+ except Exception as e:
176
+ console.print(f"[red]Error monitoring scan {scan}: {str(e)}[/red]")
177
+
178
+ # Remove completed scans
179
+ for completed in completed_scans:
180
+ pending_scans.remove(completed)
181
+
182
+ if pending_scans:
183
+ await asyncio.sleep(30) # Check every 30 seconds
184
+
185
+ async def generate_scan_report(self, output_file: str, days_back: int = 30) -> Dict:
186
+ """Generate comprehensive scanning report"""
187
+ end_date = datetime.now()
188
+ start_date = end_date - timedelta(days=days_back)
189
+
190
+ console.print(f"[blue]Generating scan report for last {days_back} days...[/blue]")
191
+
192
+ # Get all data sources
193
+ data_sources = await self.get_data_sources()
194
+
195
+ report_data = []
196
+ summary_stats = {
197
+ 'total_sources': len(data_sources),
198
+ 'scanned_sources': 0,
199
+ 'successful_scans': 0,
200
+ 'failed_scans': 0,
201
+ 'total_assets_discovered': 0
202
+ }
203
+
204
+ for source in data_sources:
205
+ source_name = source.get('name')
206
+ source_type = source.get('kind')
207
+
208
+ try:
209
+ # Get scans for this data source
210
+ scans_endpoint = f"/scan/datasources/{source_name}/scans"
211
+ scans_response = await self.client._make_request('GET', scans_endpoint)
212
+ scans = scans_response.get('value', [])
213
+
214
+ for scan in scans:
215
+ scan_name = scan.get('name')
216
+
217
+ # Get scan history
218
+ history = await self.get_scan_history(source_name, scan_name)
219
+
220
+ for run in history:
221
+ run_date = datetime.fromisoformat(run.get('startTime', '').replace('Z', '+00:00'))
222
+
223
+ if start_date <= run_date <= end_date:
224
+ summary_stats['scanned_sources'] += 1
225
+
226
+ status = run.get('status', 'Unknown')
227
+ if status == 'Succeeded':
228
+ summary_stats['successful_scans'] += 1
229
+ elif status == 'Failed':
230
+ summary_stats['failed_scans'] += 1
231
+
232
+ # Extract metrics
233
+ metrics = run.get('scanResultMetrics', {})
234
+ assets_discovered = metrics.get('processedCount', 0)
235
+ summary_stats['total_assets_discovered'] += assets_discovered
236
+
237
+ report_data.append({
238
+ 'data_source': source_name,
239
+ 'source_type': source_type,
240
+ 'scan_name': scan_name,
241
+ 'run_id': run.get('runId'),
242
+ 'status': status,
243
+ 'start_time': run.get('startTime'),
244
+ 'end_time': run.get('endTime'),
245
+ 'duration_minutes': self._calculate_duration(
246
+ run.get('startTime'), run.get('endTime')
247
+ ),
248
+ 'assets_discovered': assets_discovered,
249
+ 'assets_classified': metrics.get('classifiedCount', 0),
250
+ 'error_message': run.get('error', {}).get('message', '')
251
+ })
252
+
253
+ except Exception as e:
254
+ console.print(f"[yellow]Warning: Could not get scan data for {source_name}: {e}[/yellow]")
255
+
256
+ # Save report to CSV
257
+ df = pd.DataFrame(report_data)
258
+ df.to_csv(output_file, index=False)
259
+
260
+ # Generate summary
261
+ summary = {
262
+ 'report_file': output_file,
263
+ 'report_period': f"{start_date.date()} to {end_date.date()}",
264
+ 'statistics': summary_stats,
265
+ 'total_scan_runs': len(report_data)
266
+ }
267
+
268
+ console.print(f"[green]✓ Scan report saved to {output_file}[/green]")
269
+ console.print(f"[green]✓ Found {len(report_data)} scan runs across {summary_stats['total_sources']} data sources[/green]")
270
+
271
+ return summary
272
+
273
+ def _calculate_duration(self, start_time: str, end_time: str) -> float:
274
+ """Calculate scan duration in minutes"""
275
+ try:
276
+ if not start_time or not end_time:
277
+ return 0.0
278
+
279
+ start = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
280
+ end = datetime.fromisoformat(end_time.replace('Z', '+00:00'))
281
+
282
+ duration = end - start
283
+ return duration.total_seconds() / 60
284
+
285
+ except Exception:
286
+ return 0.0
287
+
288
+ async def optimize_scan_schedules(self) -> Dict:
289
+ """Analyze scan patterns and suggest optimizations"""
290
+ console.print("[blue]Analyzing scan patterns for optimization recommendations...[/blue]")
291
+
292
+ # Get all data sources and their scan history
293
+ data_sources = await self.get_data_sources()
294
+ optimization_report = {
295
+ 'recommendations': [],
296
+ 'statistics': {},
297
+ 'potential_savings': {}
298
+ }
299
+
300
+ for source in data_sources:
301
+ source_name = source.get('name')
302
+
303
+ try:
304
+ # Analyze scan frequency and success rates
305
+ scans_endpoint = f"/scan/datasources/{source_name}/scans"
306
+ scans_response = await self.client._make_request('GET', scans_endpoint)
307
+ scans = scans_response.get('value', [])
308
+
309
+ for scan in scans:
310
+ scan_name = scan.get('name')
311
+ history = await self.get_scan_history(source_name, scan_name)
312
+
313
+ if len(history) >= 5: # Need some history for analysis
314
+ analysis = self._analyze_scan_pattern(history)
315
+
316
+ if analysis['recommendations']:
317
+ optimization_report['recommendations'].extend([
318
+ {
319
+ 'data_source': source_name,
320
+ 'scan_name': scan_name,
321
+ 'recommendation': rec
322
+ }
323
+ for rec in analysis['recommendations']
324
+ ])
325
+
326
+ except Exception as e:
327
+ console.print(f"[yellow]Warning: Could not analyze {source_name}: {e}[/yellow]")
328
+
329
+ return optimization_report
330
+
331
+ def _analyze_scan_pattern(self, scan_history: List[Dict]) -> Dict:
332
+ """Analyze scan history to identify optimization opportunities"""
333
+ recommendations = []
334
+
335
+ # Calculate success rate
336
+ total_scans = len(scan_history)
337
+ successful_scans = sum(1 for run in scan_history if run.get('status') == 'Succeeded')
338
+ success_rate = successful_scans / total_scans if total_scans > 0 else 0
339
+
340
+ # Analyze scan frequency
341
+ scan_times = [
342
+ datetime.fromisoformat(run.get('startTime', '').replace('Z', '+00:00'))
343
+ for run in scan_history
344
+ if run.get('startTime')
345
+ ]
346
+
347
+ if len(scan_times) >= 2:
348
+ scan_times.sort()
349
+ intervals = [
350
+ (scan_times[i] - scan_times[i-1]).total_seconds() / 3600 # Hours
351
+ for i in range(1, len(scan_times))
352
+ ]
353
+ avg_interval = sum(intervals) / len(intervals) if intervals else 0
354
+
355
+ # Generate recommendations
356
+ if success_rate < 0.8:
357
+ recommendations.append(f"Low success rate ({success_rate:.1%}). Review scan configuration and data source connectivity.")
358
+
359
+ if avg_interval < 6: # Less than 6 hours between scans
360
+ recommendations.append(f"Very frequent scanning (avg {avg_interval:.1f}h intervals). Consider reducing frequency if data doesn't change often.")
361
+
362
+ if avg_interval > 168: # More than a week between scans
363
+ recommendations.append(f"Infrequent scanning (avg {avg_interval:.1f}h intervals). Consider more frequent scans for better data freshness.")
364
+
365
+ return {'recommendations': recommendations}
366
+
367
+ class ScanTemplateManager:
368
+ """Manage scanning templates and configurations"""
369
+
370
+ def __init__(self):
371
+ self.templates = self._load_default_templates()
372
+
373
+ def _load_default_templates(self) -> Dict:
374
+ """Load default scanning templates"""
375
+ return {
376
+ 'azure_storage': {
377
+ 'kind': 'AdlsGen2',
378
+ 'properties': {
379
+ 'subscriptionId': '',
380
+ 'resourceGroup': '',
381
+ 'location': '',
382
+ 'endpoint': '',
383
+ 'collection': {
384
+ 'referenceName': 'default'
385
+ }
386
+ }
387
+ },
388
+ 'sql_database': {
389
+ 'kind': 'AzureSqlDatabase',
390
+ 'properties': {
391
+ 'serverEndpoint': '',
392
+ 'databaseName': '',
393
+ 'collection': {
394
+ 'referenceName': 'default'
395
+ }
396
+ }
397
+ },
398
+ 'synapse_workspace': {
399
+ 'kind': 'AzureSynapseWorkspace',
400
+ 'properties': {
401
+ 'dedicatedSqlEndpoint': '',
402
+ 'serverlessSqlEndpoint': '',
403
+ 'collection': {
404
+ 'referenceName': 'default'
405
+ }
406
+ }
407
+ }
408
+ }
409
+
410
+ def create_data_source_config(self, template_name: str, **kwargs) -> Dict:
411
+ """Create data source configuration from template"""
412
+ if template_name not in self.templates:
413
+ raise ValueError(f"Unknown template: {template_name}")
414
+
415
+ config = self.templates[template_name].copy()
416
+
417
+ # Update properties with provided values
418
+ for key, value in kwargs.items():
419
+ if '.' in key:
420
+ # Handle nested properties like 'properties.endpoint'
421
+ parts = key.split('.')
422
+ current = config
423
+ for part in parts[:-1]:
424
+ if part not in current:
425
+ current[part] = {}
426
+ current = current[part]
427
+ current[parts[-1]] = value
428
+ else:
429
+ config[key] = value
430
+
431
+ return config
432
+
433
+ def create_scan_config(self, scan_name: str, scan_ruleset: str = None) -> Dict:
434
+ """Create scan configuration"""
435
+ config = {
436
+ 'name': scan_name,
437
+ 'kind': 'AzureSqlDatabaseCredential',
438
+ 'properties': {
439
+ 'scanRulesetName': scan_ruleset or 'AzureSqlDatabase',
440
+ 'scanRulesetType': 'System',
441
+ 'collection': {
442
+ 'referenceName': 'default'
443
+ }
444
+ }
445
+ }
446
+
447
+ return config
448
+
449
+ def save_template(self, name: str, template: Dict, file_path: str = None):
450
+ """Save custom template"""
451
+ self.templates[name] = template
452
+
453
+ if file_path:
454
+ with open(file_path, 'w') as f:
455
+ json.dump({name: template}, f, indent=2)
456
+
457
+ def load_template_from_file(self, file_path: str) -> Dict:
458
+ """Load template from file"""
459
+ with open(file_path, 'r') as f:
460
+ return json.load(f)
461
+
462
+ # CLI Integration Functions
463
+ async def create_scanning_cli_commands():
464
+ """Create CLI commands for scanning operations"""
465
+ # This would integrate with the enhanced_cli.py
466
+ # Example implementation for demonstration
467
+
468
+ @click.group()
469
+ def scanning():
470
+ """Advanced scanning operations and automation"""
471
+ pass
472
+
473
+ @scanning.command()
474
+ @click.option('--config-file', required=True, help='Data source configuration file')
475
+ @click.option('--profile', default='default', help='Configuration profile')
476
+ async def create_sources(config_file, profile):
477
+ """Create multiple data sources from configuration file"""
478
+ config = PurviewConfig.load_profile(profile)
479
+
480
+ with open(config_file, 'r') as f:
481
+ sources_config = json.load(f)
482
+
483
+ async with PurviewClient(config) as client:
484
+ manager = ScanningManager(client)
485
+ results = await manager.bulk_create_data_sources(sources_config)
486
+
487
+ console.print(f"[green]✓ Created {len(results['created'])} data sources[/green]")
488
+ if results['failed']:
489
+ console.print(f"[red]✗ Failed to create {len(results['failed'])} data sources[/red]")
490
+
491
+ @scanning.command()
492
+ @click.option('--output-file', required=True, help='Output file for scan report')
493
+ @click.option('--days', default=30, help='Number of days to include in report')
494
+ @click.option('--profile', default='default', help='Configuration profile')
495
+ async def report(output_file, days, profile):
496
+ """Generate comprehensive scanning report"""
497
+ config = PurviewConfig.load_profile(profile)
498
+
499
+ async with PurviewClient(config) as client:
500
+ manager = ScanningManager(client)
501
+ report = await manager.generate_scan_report(output_file, days)
502
+
503
+ # Display summary
504
+ stats = report['statistics']
505
+ table = Table(title="Scan Report Summary")
506
+ table.add_column("Metric", style="cyan")
507
+ table.add_column("Value", style="green")
508
+
509
+ table.add_row("Total Data Sources", str(stats['total_sources']))
510
+ table.add_row("Successful Scans", str(stats['successful_scans']))
511
+ table.add_row("Failed Scans", str(stats['failed_scans']))
512
+ table.add_row("Assets Discovered", str(stats['total_assets_discovered']))
513
+
514
+ console.print(table)
515
+
516
+ return scanning
517
+
518
+ # Export the main classes and functions
519
+ __all__ = [
520
+ 'ScanningManager',
521
+ 'ScanTemplateManager',
522
+ 'create_scanning_cli_commands'
523
+ ]
@@ -0,0 +1 @@
1
+ PURVIEW_ACCOUNT_NAME = None