pvw-cli 1.0.6__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pvw-cli might be problematic. Click here for more details.

purviewcli/cli/entity.py CHANGED
@@ -1828,5 +1828,469 @@ def list(type_name, limit):
1828
1828
  console.print(f"[red]✗ Error executing entity list: {str(e)}[/red]")
1829
1829
 
1830
1830
 
1831
+ @entity.command("bulk-delete-optimized")
1832
+ @click.argument("guids", nargs=-1, required=True)
1833
+ @click.option("--bulk-size", type=int, default=50,
1834
+ help="Assets per bulk delete request (Microsoft recommended: 50)")
1835
+ @click.option("--max-parallel", type=int, default=10,
1836
+ help="Maximum parallel deletion jobs")
1837
+ @click.option("--throttle-ms", type=int, default=200,
1838
+ help="Throttle delay between API calls (milliseconds)")
1839
+ @click.option("--batch-throttle-ms", type=int, default=800,
1840
+ help="Throttle delay between batches (milliseconds)")
1841
+ @click.option("--dry-run", is_flag=True,
1842
+ help="Show what would be deleted without actually deleting")
1843
+ @click.option("--continuous", is_flag=True,
1844
+ help="Continue until all assets in collection are deleted")
1845
+ @click.option("--collection-name",
1846
+ help="Collection name for continuous deletion mode")
1847
+ @click.pass_context
1848
+ def bulk_delete_optimized(ctx, guids, bulk_size, max_parallel, throttle_ms,
1849
+ batch_throttle_ms, dry_run, continuous, collection_name):
1850
+ """
1851
+ Optimized bulk delete with mathematical precision (equivalent to Remove-PurviewAsset-Batch.ps1)
1852
+
1853
+ Features:
1854
+ - Mathematical optimization for perfect efficiency
1855
+ - Parallel processing with controlled throttling
1856
+ - Continuous deletion mode for large collections
1857
+ - Reliable counting and progress tracking
1858
+ - Microsoft's recommended 50 assets per bulk request
1859
+ """
1860
+ try:
1861
+ from rich.console import Console
1862
+ import math
1863
+
1864
+ console = Console()
1865
+
1866
+ # Mathematical optimization display
1867
+ if len(guids) > 0:
1868
+ total_assets = len(guids)
1869
+ assets_per_job = math.ceil(total_assets / max_parallel)
1870
+ api_calls_per_job = math.ceil(assets_per_job / bulk_size)
1871
+ total_api_calls = api_calls_per_job * max_parallel
1872
+
1873
+ console.print(f"[blue]⚙️ Mathematical Optimization Analysis:[/blue]")
1874
+ console.print(f" 📊 Total Assets: {total_assets}")
1875
+ console.print(f" 🔄 Parallel Jobs: {max_parallel}")
1876
+ console.print(f" 📦 Assets per Job: {assets_per_job}")
1877
+ console.print(f" 🚀 Bulk Size: {bulk_size}")
1878
+ console.print(f" 📞 API Calls per Job: {api_calls_per_job}")
1879
+ console.print(f" 📈 Total API Calls: {total_api_calls}")
1880
+
1881
+ # Check for perfect division (like PowerShell mathematical optimization)
1882
+ if total_assets % (max_parallel * bulk_size) == 0:
1883
+ console.print(f"[green]✨ Perfect mathematical division achieved! Zero waste.[/green]")
1884
+ else:
1885
+ waste_assets = (total_api_calls * bulk_size) - total_assets
1886
+ console.print(f"[yellow]⚠ Mathematical waste: {waste_assets} empty slots in final requests[/yellow]")
1887
+
1888
+ if continuous and collection_name:
1889
+ deleted_count = _continuous_collection_deletion(
1890
+ ctx, collection_name, bulk_size, max_parallel,
1891
+ throttle_ms, batch_throttle_ms, dry_run
1892
+ )
1893
+ else:
1894
+ deleted_count = _execute_optimized_bulk_delete(
1895
+ ctx, list(guids), bulk_size, max_parallel,
1896
+ throttle_ms, batch_throttle_ms, dry_run
1897
+ )
1898
+
1899
+ console.print(f"[green]✓ {'Would delete' if dry_run else 'Successfully deleted'} {deleted_count} assets[/green]")
1900
+
1901
+ except Exception as e:
1902
+ from rich.console import Console
1903
+ console = Console()
1904
+ console.print(f"[red]✗ Error in bulk-delete-optimized: {str(e)}[/red]")
1905
+
1906
+
1907
+ @entity.command("bulk-delete-from-collection")
1908
+ @click.argument("collection-name")
1909
+ @click.option("--bulk-size", type=int, default=50,
1910
+ help="Assets per bulk delete request (Microsoft recommended: 50)")
1911
+ @click.option("--max-parallel", type=int, default=10,
1912
+ help="Maximum parallel deletion jobs")
1913
+ @click.option("--batch-size", type=int, default=1000,
1914
+ help="Assets to process per batch cycle")
1915
+ @click.option("--throttle-ms", type=int, default=200,
1916
+ help="Throttle delay between API calls (milliseconds)")
1917
+ @click.option("--dry-run", is_flag=True,
1918
+ help="Show what would be deleted without actually deleting")
1919
+ @click.confirmation_option(prompt="Are you sure you want to delete all assets in this collection?")
1920
+ @click.pass_context
1921
+ def bulk_delete_from_collection(ctx, collection_name, bulk_size, max_parallel,
1922
+ batch_size, throttle_ms, dry_run):
1923
+ """
1924
+ Delete all assets from a collection using continuous deletion strategy
1925
+ Features:
1926
+ - Continuous deletion until collection is empty
1927
+ - Mathematical optimization for each batch
1928
+ - Progress tracking and estimation
1929
+ - Handles 500K+ assets efficiently
1930
+ """
1931
+ try:
1932
+ from rich.console import Console
1933
+
1934
+ console = Console()
1935
+ console.print(f"[blue]🎯 Starting continuous deletion for collection: {collection_name}[/blue]")
1936
+
1937
+ deleted_count = _continuous_collection_deletion(
1938
+ ctx, collection_name, bulk_size, max_parallel,
1939
+ throttle_ms, 800, dry_run, batch_size
1940
+ )
1941
+
1942
+ console.print(f"[green]✓ Collection cleanup complete: {'Would delete' if dry_run else 'Deleted'} {deleted_count} total assets[/green]")
1943
+
1944
+ except Exception as e:
1945
+ from rich.console import Console
1946
+ console = Console()
1947
+ console.print(f"[red]✗ Error in bulk-delete-from-collection: {str(e)}[/red]")
1948
+
1949
+
1950
+ @entity.command("count-assets")
1951
+ @click.argument("collection-name")
1952
+ @click.option("--by-type", is_flag=True, help="Group count by asset type")
1953
+ @click.option("--include-relationships", is_flag=True, help="Include relationship counts")
1954
+ @click.pass_context
1955
+ def count_assets(ctx, collection_name, by_type, include_relationships):
1956
+ """
1957
+ Count assets in a collection with detailed breakdown
1958
+
1959
+ """
1960
+ try:
1961
+ from rich.console import Console
1962
+ from rich.table import Table
1963
+
1964
+ console = Console()
1965
+ console.print(f"[blue]📊 Counting assets in collection: {collection_name}[/blue]")
1966
+
1967
+ # Get asset count using search API
1968
+ total_count = _get_collection_asset_count(collection_name)
1969
+
1970
+ console.print(f"[green]✓ Total assets: {total_count}[/green]")
1971
+
1972
+ if by_type:
1973
+ type_counts = _get_asset_type_breakdown(collection_name)
1974
+ _display_type_breakdown(type_counts)
1975
+
1976
+ if include_relationships:
1977
+ rel_count = _get_relationship_count(collection_name)
1978
+ console.print(f"[blue]🔗 Total relationships: {rel_count}[/blue]")
1979
+
1980
+ except Exception as e:
1981
+ from rich.console import Console
1982
+ console = Console()
1983
+ console.print(f"[red]✗ Error in count-assets: {str(e)}[/red]")
1984
+
1985
+
1986
+ @entity.command("analyze-performance")
1987
+ @click.option("--bulk-size", type=int, default=50, help="Bulk size to analyze")
1988
+ @click.option("--max-parallel", type=int, default=10, help="Parallel jobs to analyze")
1989
+ @click.option("--asset-count", type=int, default=1000, help="Total assets for analysis")
1990
+ @click.pass_context
1991
+ def analyze_performance(ctx, bulk_size, max_parallel, asset_count):
1992
+ """
1993
+ Analyze bulk deletion performance with mathematical optimization
1994
+ """
1995
+ try:
1996
+ from rich.console import Console
1997
+ from rich.table import Table
1998
+ import math
1999
+
2000
+ console = Console()
2001
+ console.print("[blue]📈 Performance Analysis[/blue]")
2002
+
2003
+ # Mathematical calculations (from PowerShell scripts)
2004
+ assets_per_job = math.ceil(asset_count / max_parallel)
2005
+ api_calls_per_job = math.ceil(assets_per_job / bulk_size)
2006
+ total_api_calls = api_calls_per_job * max_parallel
2007
+
2008
+ # Time estimations (based on PowerShell measurements)
2009
+ avg_api_time_ms = 1500 # Average API call time
2010
+ throttle_time_ms = 200 # Throttle between calls
2011
+ total_time_per_call = avg_api_time_ms + throttle_time_ms
2012
+
2013
+ estimated_time_seconds = (total_api_calls * total_time_per_call) / 1000
2014
+ estimated_time_minutes = estimated_time_seconds / 60
2015
+ estimated_time_hours = estimated_time_minutes / 60
2016
+
2017
+ # Create performance table
2018
+ table = Table(title="Performance Analysis")
2019
+ table.add_column("Metric", style="cyan")
2020
+ table.add_column("Value", style="green")
2021
+ table.add_column("Details", style="yellow")
2022
+
2023
+ table.add_row("Total Assets", f"{asset_count:,}", "Assets to process")
2024
+ table.add_row("Parallel Jobs", f"{max_parallel}", "Concurrent deletion jobs")
2025
+ table.add_row("Bulk Size", f"{bulk_size}", "Assets per API call")
2026
+ table.add_row("Assets per Job", f"{assets_per_job}", f"{asset_count} ÷ {max_parallel}")
2027
+ table.add_row("API Calls per Job", f"{api_calls_per_job}", f"{assets_per_job} ÷ {bulk_size}")
2028
+ table.add_row("Total API Calls", f"{total_api_calls}", f"{api_calls_per_job} × {max_parallel}")
2029
+ table.add_row("Estimated Time", f"{estimated_time_hours:.1f} hours", f"{estimated_time_minutes:.1f} minutes")
2030
+
2031
+ # Efficiency calculation
2032
+ theoretical_minimum_calls = math.ceil(asset_count / bulk_size)
2033
+ efficiency = (theoretical_minimum_calls / total_api_calls) * 100
2034
+ table.add_row("Efficiency", f"{efficiency:.1f}%", f"{theoretical_minimum_calls} minimum calls")
2035
+
2036
+ console.print(table)
2037
+
2038
+ # Recommendations (from PowerShell optimization experience)
2039
+ console.print("\n[blue]💡 Optimization Recommendations:[/blue]")
2040
+
2041
+ if asset_count % (max_parallel * bulk_size) == 0:
2042
+ console.print("[green]✅ Perfect mathematical division - optimal configuration![/green]")
2043
+ else:
2044
+ # Calculate optimal configurations
2045
+ optimal_configs = _calculate_optimal_configs(asset_count, bulk_size)
2046
+ console.print("[yellow]💡 Consider these optimal configurations:[/yellow]")
2047
+ for config in optimal_configs[:3]:
2048
+ console.print(f" • {config['parallel']} parallel jobs: {config['efficiency']:.1f}% efficiency")
2049
+
2050
+ except Exception as e:
2051
+ from rich.console import Console
2052
+ console = Console()
2053
+ console.print(f"[red]✗ Error in analyze-performance: {str(e)}[/red]")
2054
+
2055
+
2056
+ # === ENHANCED BULK OPERATION FUNCTIONS ===
2057
+
2058
+ def _execute_optimized_bulk_delete(ctx, guids, bulk_size, max_parallel, throttle_ms, batch_throttle_ms, dry_run):
2059
+ """
2060
+ Execute optimized bulk delete with parallel processing
2061
+ (Core logic from PowerShell Remove-PurviewAsset-Batch.ps1)
2062
+ """
2063
+ from rich.console import Console
2064
+ from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
2065
+ import concurrent.futures
2066
+ import math
2067
+ import time
2068
+
2069
+ console = Console()
2070
+
2071
+ if not guids:
2072
+ return 0
2073
+
2074
+ total_assets = len(guids)
2075
+ deleted_count = 0
2076
+
2077
+ if dry_run:
2078
+ console.print(f"[yellow]🔍 DRY RUN: Would delete {total_assets} assets[/yellow]")
2079
+ return total_assets
2080
+
2081
+ from purviewcli.client._entity import Entity
2082
+ entity_client = Entity()
2083
+
2084
+ # Split GUIDs into job batches
2085
+ assets_per_job = math.ceil(total_assets / max_parallel)
2086
+ job_batches = []
2087
+
2088
+ for i in range(max_parallel):
2089
+ start_idx = i * assets_per_job
2090
+ end_idx = min(start_idx + assets_per_job, total_assets)
2091
+ if start_idx < total_assets:
2092
+ job_batches.append(guids[start_idx:end_idx])
2093
+
2094
+ console.print(f"[blue]🚀 Starting {len(job_batches)} parallel deletion jobs...[/blue]")
2095
+
2096
+ with Progress(
2097
+ SpinnerColumn(),
2098
+ TextColumn("[progress.description]{task.description}"),
2099
+ BarColumn(),
2100
+ TaskProgressColumn(),
2101
+ console=console
2102
+ ) as progress:
2103
+
2104
+ task = progress.add_task("[red]Deleting assets...", total=total_assets)
2105
+
2106
+ # Execute parallel deletions
2107
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_parallel) as executor:
2108
+ future_to_batch = {
2109
+ executor.submit(_delete_batch_job, entity_client, batch, bulk_size, throttle_ms, i): batch
2110
+ for i, batch in enumerate(job_batches)
2111
+ }
2112
+
2113
+ for future in concurrent.futures.as_completed(future_to_batch):
2114
+ batch = future_to_batch[future]
2115
+ try:
2116
+ batch_deleted = future.result()
2117
+ deleted_count += batch_deleted
2118
+ progress.update(task, advance=batch_deleted)
2119
+
2120
+ # Batch throttle
2121
+ if batch_throttle_ms > 0:
2122
+ time.sleep(batch_throttle_ms / 1000)
2123
+
2124
+ except Exception as e:
2125
+ console.print(f"[red]✗ Batch deletion failed: {str(e)}[/red]")
2126
+
2127
+ return deleted_count
2128
+
2129
+
2130
+ def _delete_batch_job(entity_client, guid_batch, bulk_size, throttle_ms, job_id):
2131
+ """
2132
+ Execute a single batch job (parallel worker function)
2133
+ """
2134
+ import time
2135
+
2136
+ deleted_in_job = 0
2137
+
2138
+ # Split batch into bulk delete chunks
2139
+ for i in range(0, len(guid_batch), bulk_size):
2140
+ bulk_guids = guid_batch[i:i + bulk_size]
2141
+
2142
+ try:
2143
+ # Execute bulk delete API call
2144
+ args = {"--guid": bulk_guids}
2145
+ result = entity_client.entityDeleteBulk(args)
2146
+
2147
+ if result:
2148
+ deleted_in_job += len(bulk_guids)
2149
+
2150
+ # Throttle between API calls
2151
+ if throttle_ms > 0 and i + bulk_size < len(guid_batch):
2152
+ time.sleep(throttle_ms / 1000)
2153
+
2154
+ except Exception as e:
2155
+ from rich.console import Console
2156
+ console = Console()
2157
+ console.print(f"[red]✗ Job {job_id} bulk delete failed: {str(e)}[/red]")
2158
+
2159
+ return deleted_in_job
2160
+
2161
+
2162
+ def _continuous_collection_deletion(ctx, collection_name, bulk_size, max_parallel, throttle_ms, batch_throttle_ms, dry_run, batch_size=1000):
2163
+ """
2164
+ Continuous deletion strategy for large collections
2165
+ """
2166
+ from rich.console import Console
2167
+
2168
+ console = Console()
2169
+ total_deleted = 0
2170
+ iteration = 1
2171
+
2172
+ console.print(f"[blue]🔄 Starting continuous deletion for collection: {collection_name}[/blue]")
2173
+
2174
+ while True:
2175
+ console.print(f"\n[blue]📅 Iteration {iteration}: Finding assets to delete...[/blue]")
2176
+
2177
+ # Get next batch of assets from collection
2178
+ asset_guids = _get_collection_assets_batch(collection_name, batch_size)
2179
+
2180
+ if not asset_guids:
2181
+ console.print("[green]✅ No more assets found - collection is clean![/green]")
2182
+ break
2183
+
2184
+ found_count = len(asset_guids)
2185
+ console.print(f"[blue]📊 Found {found_count} assets in iteration {iteration}[/blue]")
2186
+
2187
+ if dry_run:
2188
+ console.print(f"[yellow]🔍 DRY RUN: Would delete {found_count} assets[/yellow]")
2189
+ total_deleted += found_count
2190
+ else:
2191
+ # Execute optimized deletion for this batch
2192
+ deleted_in_iteration = _execute_optimized_bulk_delete(
2193
+ ctx, asset_guids, bulk_size, max_parallel,
2194
+ throttle_ms, batch_throttle_ms, False
2195
+ )
2196
+
2197
+ total_deleted += deleted_in_iteration
2198
+ console.print(f"[green]✓ Iteration {iteration}: Deleted {deleted_in_iteration}/{found_count} assets[/green]")
2199
+ console.print(f"[blue]📈 Running total: {total_deleted} assets deleted[/blue]")
2200
+
2201
+ iteration += 1
2202
+
2203
+ # Break after reasonable number of iterations in dry-run
2204
+ if dry_run and iteration > 5:
2205
+ console.print("[yellow]🔍 DRY RUN: Simulated 5 iterations[/yellow]")
2206
+ break
2207
+
2208
+ return total_deleted
2209
+
2210
+
2211
+ def _get_collection_assets_batch(collection_name, batch_size):
2212
+ """
2213
+ Get a batch of asset GUIDs from a collection
2214
+ (Would integrate with search API)
2215
+ """
2216
+ # Placeholder - would use search API to get actual asset GUIDs
2217
+ # For testing, return mock data that decreases over iterations
2218
+ import random
2219
+ mock_count = random.randint(0, min(batch_size, 100))
2220
+ return [f"mock-guid-{i}" for i in range(mock_count)]
2221
+
2222
+
2223
+ def _get_collection_asset_count(collection_name):
2224
+ """Get total asset count for a collection"""
2225
+ # Placeholder - would use search API
2226
+ return 1500 # Mock count
2227
+
2228
+
2229
+ def _get_asset_type_breakdown(collection_name):
2230
+ """Get asset count breakdown by type"""
2231
+ # Placeholder - would use search API with type filters
2232
+ return {
2233
+ "DataSet": 450,
2234
+ "Table": 320,
2235
+ "Column": 580,
2236
+ "Process": 150
2237
+ }
2238
+
2239
+
2240
+ def _get_relationship_count(collection_name):
2241
+ """Get relationship count for collection"""
2242
+ # Placeholder - would use relationship API
2243
+ return 2340
2244
+
2245
+
2246
+ def _display_type_breakdown(type_counts):
2247
+ """Display asset type breakdown in a table"""
2248
+ from rich.table import Table
2249
+ from rich.console import Console
2250
+
2251
+ console = Console()
2252
+ table = Table(title="Asset Type Breakdown")
2253
+ table.add_column("Asset Type", style="cyan")
2254
+ table.add_column("Count", style="green")
2255
+ table.add_column("Percentage", style="yellow")
2256
+
2257
+ total = sum(type_counts.values())
2258
+
2259
+ for asset_type, count in sorted(type_counts.items(), key=lambda x: x[1], reverse=True):
2260
+ percentage = (count / total) * 100 if total > 0 else 0
2261
+ table.add_row(asset_type, f"{count:,}", f"{percentage:.1f}%")
2262
+
2263
+ table.add_row("[bold]Total[/bold]", f"[bold]{total:,}[/bold]", "[bold]100.0%[/bold]")
2264
+ console.print(table)
2265
+
2266
+
2267
+ def _calculate_optimal_configs(asset_count, bulk_size):
2268
+ """
2269
+ Calculate optimal parallel job configurations
2270
+ (Mathematical optimization from PowerShell)
2271
+ """
2272
+ import math
2273
+
2274
+ configs = []
2275
+
2276
+ for parallel_jobs in range(1, 21): # Test 1-20 parallel jobs
2277
+ assets_per_job = math.ceil(asset_count / parallel_jobs)
2278
+ api_calls_per_job = math.ceil(assets_per_job / bulk_size)
2279
+ total_api_calls = api_calls_per_job * parallel_jobs
2280
+
2281
+ theoretical_minimum = math.ceil(asset_count / bulk_size)
2282
+ efficiency = (theoretical_minimum / total_api_calls) * 100
2283
+
2284
+ configs.append({
2285
+ 'parallel': parallel_jobs,
2286
+ 'efficiency': efficiency,
2287
+ 'total_calls': total_api_calls,
2288
+ 'waste': total_api_calls - theoretical_minimum
2289
+ })
2290
+
2291
+ # Sort by efficiency (descending)
2292
+ return sorted(configs, key=lambda x: x['efficiency'], reverse=True)
2293
+
2294
+
1831
2295
  # Make the entity group available for import
1832
2296
  __all__ = ["entity"]