fabric-pbi 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabric_pbi-1.0.0.dist-info/METADATA +124 -0
- fabric_pbi-1.0.0.dist-info/RECORD +17 -0
- fabric_pbi-1.0.0.dist-info/WHEEL +5 -0
- fabric_pbi-1.0.0.dist-info/top_level.txt +1 -0
- fabricpandas/__init__.py +22 -0
- fabricpandas/auth/__init__.py +8 -0
- fabricpandas/auth/base_client.py +220 -0
- fabricpandas/bulks/__init__.py +8 -0
- fabricpandas/bulks/bulk_client.py +733 -0
- fabricpandas/client.py +80 -0
- fabricpandas/report/__init__.py +8 -0
- fabricpandas/report/report_client.py +509 -0
- fabricpandas/semantic_model/__init__.py +8 -0
- fabricpandas/semantic_model/semantic_model_client.py +643 -0
- fabricpandas/utils/__init__.py +6 -0
- fabricpandas/workspace/__init__.py +8 -0
- fabricpandas/workspace/workspace_client.py +167 -0
|
@@ -0,0 +1,733 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Bulk Operations Client for Microsoft Fabric
|
|
3
|
+
Provides bulk operations across multiple workspaces and items.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import time
|
|
7
|
+
import requests
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional, Dict, Any, List, Callable
|
|
10
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
11
|
+
from ..auth import BaseClient
|
|
12
|
+
from ..semantic_model import SemanticModelClient
|
|
13
|
+
from ..report import ReportClient
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BulkClient(BaseClient):
|
|
17
|
+
"""
|
|
18
|
+
Client for performing bulk operations across workspaces and items
|
|
19
|
+
|
|
20
|
+
Provides methods to:
|
|
21
|
+
- Get all workspaces
|
|
22
|
+
- Get all semantic models across workspaces
|
|
23
|
+
- Get all reports across workspaces
|
|
24
|
+
- Download all definitions in bulk
|
|
25
|
+
- Update multiple items in bulk
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
access_token: Optional[str] = None,
|
|
31
|
+
base_url: str = "https://api.fabric.microsoft.com/v1",
|
|
32
|
+
env_file: str = ".env",
|
|
33
|
+
max_workers: int = 5
|
|
34
|
+
):
|
|
35
|
+
"""
|
|
36
|
+
Initialize Bulk Client
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
access_token: Bearer token for authentication
|
|
40
|
+
base_url: Base URL for Fabric API
|
|
41
|
+
env_file: Path to .env file
|
|
42
|
+
max_workers: Maximum number of parallel workers for bulk operations (default: 5)
|
|
43
|
+
"""
|
|
44
|
+
super().__init__(access_token, base_url, env_file)
|
|
45
|
+
self.max_workers = max_workers
|
|
46
|
+
|
|
47
|
+
# Initialize specialized clients
|
|
48
|
+
self.semantic_model_client = SemanticModelClient(
|
|
49
|
+
access_token=self.config.access_token,
|
|
50
|
+
base_url=base_url
|
|
51
|
+
)
|
|
52
|
+
self.report_client = ReportClient(
|
|
53
|
+
access_token=self.config.access_token,
|
|
54
|
+
base_url=base_url
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def _download_with_retry(
|
|
58
|
+
self,
|
|
59
|
+
download_func: Callable,
|
|
60
|
+
max_retries: int = 5,
|
|
61
|
+
initial_delay: int = 2,
|
|
62
|
+
**kwargs
|
|
63
|
+
) -> Dict[str, Any]:
|
|
64
|
+
"""
|
|
65
|
+
Download with exponential backoff retry on 429 errors
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
download_func: Function to call for download
|
|
69
|
+
max_retries: Maximum retry attempts (default: 5)
|
|
70
|
+
initial_delay: Initial delay in seconds (default: 2)
|
|
71
|
+
**kwargs: Arguments to pass to download_func
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Result from download_func
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
Exception: If max retries exceeded or non-recoverable error
|
|
78
|
+
"""
|
|
79
|
+
delay = initial_delay
|
|
80
|
+
|
|
81
|
+
for attempt in range(max_retries):
|
|
82
|
+
try:
|
|
83
|
+
return download_func(**kwargs)
|
|
84
|
+
|
|
85
|
+
except requests.exceptions.HTTPError as e:
|
|
86
|
+
if e.response.status_code == 429:
|
|
87
|
+
# Get Retry-After header if available
|
|
88
|
+
retry_after = e.response.headers.get("Retry-After")
|
|
89
|
+
if retry_after:
|
|
90
|
+
try:
|
|
91
|
+
delay = int(retry_after)
|
|
92
|
+
except ValueError:
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
if attempt < max_retries - 1:
|
|
96
|
+
print(f" ⚠️ Rate limited (429). Waiting {delay}s before retry {attempt + 1}/{max_retries}...")
|
|
97
|
+
time.sleep(delay)
|
|
98
|
+
# Exponential backoff for next retry
|
|
99
|
+
delay = min(delay * 2, 60) # Cap at 60 seconds
|
|
100
|
+
continue
|
|
101
|
+
else:
|
|
102
|
+
# Max retries exceeded
|
|
103
|
+
raise Exception(f"429 Client Error: for url: {e.response.url}")
|
|
104
|
+
else:
|
|
105
|
+
# Non-429 HTTP error, don't retry
|
|
106
|
+
raise
|
|
107
|
+
|
|
108
|
+
except Exception as e:
|
|
109
|
+
# Non-HTTP error, don't retry
|
|
110
|
+
raise
|
|
111
|
+
|
|
112
|
+
raise Exception(f"Failed after {max_retries} retries")
|
|
113
|
+
|
|
114
|
+
def list_workspaces(
|
|
115
|
+
self,
|
|
116
|
+
continuation_token: Optional[str] = None
|
|
117
|
+
) -> Dict[str, Any]:
|
|
118
|
+
"""
|
|
119
|
+
List all workspaces
|
|
120
|
+
|
|
121
|
+
Reference: https://learn.microsoft.com/en-us/rest/api/fabric/core/workspaces/list-workspaces
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
continuation_token: Optional token for pagination
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
Dictionary with workspace list
|
|
128
|
+
"""
|
|
129
|
+
endpoint = "workspaces"
|
|
130
|
+
params = {}
|
|
131
|
+
|
|
132
|
+
if continuation_token:
|
|
133
|
+
params["continuationToken"] = continuation_token
|
|
134
|
+
|
|
135
|
+
response = self._make_request("GET", endpoint, params=params)
|
|
136
|
+
return response.json()
|
|
137
|
+
|
|
138
|
+
def get_all_workspaces(self) -> List[Dict[str, Any]]:
|
|
139
|
+
"""
|
|
140
|
+
Get all workspaces with automatic pagination
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
List of all workspaces
|
|
144
|
+
"""
|
|
145
|
+
all_workspaces = []
|
|
146
|
+
continuation_token = None
|
|
147
|
+
|
|
148
|
+
print("Fetching all workspaces...")
|
|
149
|
+
|
|
150
|
+
while True:
|
|
151
|
+
result = self.list_workspaces(continuation_token=continuation_token)
|
|
152
|
+
workspaces = result.get("value", [])
|
|
153
|
+
all_workspaces.extend(workspaces)
|
|
154
|
+
|
|
155
|
+
print(f" Retrieved {len(workspaces)} workspaces (Total: {len(all_workspaces)})")
|
|
156
|
+
|
|
157
|
+
# Check if there are more pages
|
|
158
|
+
continuation_token = result.get("continuationToken")
|
|
159
|
+
if not continuation_token:
|
|
160
|
+
break
|
|
161
|
+
|
|
162
|
+
print(f"✓ Total workspaces found: {len(all_workspaces)}\n")
|
|
163
|
+
return all_workspaces
|
|
164
|
+
|
|
165
|
+
def get_all_semantic_models(
|
|
166
|
+
self,
|
|
167
|
+
workspace_ids: Optional[List[str]] = None,
|
|
168
|
+
include_workspace_info: bool = True
|
|
169
|
+
) -> List[Dict[str, Any]]:
|
|
170
|
+
"""
|
|
171
|
+
Get all semantic models across specified workspaces or all workspaces
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
workspace_ids: List of workspace IDs (if None, fetches from all workspaces)
|
|
175
|
+
include_workspace_info: If True, includes workspace info with each model
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
List of all semantic models with optional workspace info
|
|
179
|
+
"""
|
|
180
|
+
# Get workspaces if not provided
|
|
181
|
+
if workspace_ids is None:
|
|
182
|
+
workspaces = self.get_all_workspaces()
|
|
183
|
+
workspace_ids = [w["id"] for w in workspaces]
|
|
184
|
+
workspace_map = {w["id"]: w for w in workspaces}
|
|
185
|
+
else:
|
|
186
|
+
workspace_map = {}
|
|
187
|
+
|
|
188
|
+
all_models = []
|
|
189
|
+
|
|
190
|
+
print(f"Fetching semantic models from {len(workspace_ids)} workspace(s)...\n")
|
|
191
|
+
|
|
192
|
+
for idx, workspace_id in enumerate(workspace_ids, 1):
|
|
193
|
+
try:
|
|
194
|
+
print(f"[{idx}/{len(workspace_ids)}] Workspace: {workspace_id}")
|
|
195
|
+
result = self.semantic_model_client.list_semantic_models(workspace_id)
|
|
196
|
+
models = result.get("value", [])
|
|
197
|
+
|
|
198
|
+
# Add workspace info if requested
|
|
199
|
+
if include_workspace_info and workspace_id in workspace_map:
|
|
200
|
+
for model in models:
|
|
201
|
+
model["_workspace"] = workspace_map[workspace_id]
|
|
202
|
+
elif include_workspace_info:
|
|
203
|
+
for model in models:
|
|
204
|
+
model["_workspace_id"] = workspace_id
|
|
205
|
+
|
|
206
|
+
all_models.extend(models)
|
|
207
|
+
print(f" ✓ Found {len(models)} model(s)\n")
|
|
208
|
+
|
|
209
|
+
except Exception as e:
|
|
210
|
+
print(f" ✗ Error: {e}\n")
|
|
211
|
+
continue
|
|
212
|
+
|
|
213
|
+
print(f"{'=' * 60}")
|
|
214
|
+
print(f"Total semantic models found: {len(all_models)}")
|
|
215
|
+
print(f"{'=' * 60}\n")
|
|
216
|
+
|
|
217
|
+
return all_models
|
|
218
|
+
|
|
219
|
+
def get_all_reports(
|
|
220
|
+
self,
|
|
221
|
+
workspace_ids: Optional[List[str]] = None,
|
|
222
|
+
include_workspace_info: bool = True
|
|
223
|
+
) -> List[Dict[str, Any]]:
|
|
224
|
+
"""
|
|
225
|
+
Get all reports across specified workspaces or all workspaces
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
workspace_ids: List of workspace IDs (if None, fetches from all workspaces)
|
|
229
|
+
include_workspace_info: If True, includes workspace info with each report
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
List of all reports with optional workspace info
|
|
233
|
+
"""
|
|
234
|
+
# Get workspaces if not provided
|
|
235
|
+
if workspace_ids is None:
|
|
236
|
+
workspaces = self.get_all_workspaces()
|
|
237
|
+
workspace_ids = [w["id"] for w in workspaces]
|
|
238
|
+
workspace_map = {w["id"]: w for w in workspaces}
|
|
239
|
+
else:
|
|
240
|
+
workspace_map = {}
|
|
241
|
+
|
|
242
|
+
all_reports = []
|
|
243
|
+
|
|
244
|
+
print(f"Fetching reports from {len(workspace_ids)} workspace(s)...\n")
|
|
245
|
+
|
|
246
|
+
for idx, workspace_id in enumerate(workspace_ids, 1):
|
|
247
|
+
try:
|
|
248
|
+
print(f"[{idx}/{len(workspace_ids)}] Workspace: {workspace_id}")
|
|
249
|
+
result = self.report_client.list_reports(workspace_id)
|
|
250
|
+
reports = result.get("value", [])
|
|
251
|
+
|
|
252
|
+
# Add workspace info if requested
|
|
253
|
+
if include_workspace_info and workspace_id in workspace_map:
|
|
254
|
+
for report in reports:
|
|
255
|
+
report["_workspace"] = workspace_map[workspace_id]
|
|
256
|
+
elif include_workspace_info:
|
|
257
|
+
for report in reports:
|
|
258
|
+
report["_workspace_id"] = workspace_id
|
|
259
|
+
|
|
260
|
+
all_reports.extend(reports)
|
|
261
|
+
print(f" ✓ Found {len(reports)} report(s)\n")
|
|
262
|
+
|
|
263
|
+
except Exception as e:
|
|
264
|
+
print(f" ✗ Error: {e}\n")
|
|
265
|
+
continue
|
|
266
|
+
|
|
267
|
+
print(f"{'=' * 60}")
|
|
268
|
+
print(f"Total reports found: {len(all_reports)}")
|
|
269
|
+
print(f"{'=' * 60}\n")
|
|
270
|
+
|
|
271
|
+
return all_reports
|
|
272
|
+
|
|
273
|
+
def download_all_semantic_model_definitions(
|
|
274
|
+
self,
|
|
275
|
+
workspace_ids: Optional[List[str]] = None,
|
|
276
|
+
output_base_folder: str = "bulk_semantic_models",
|
|
277
|
+
use_model_id_as_folder: bool = True,
|
|
278
|
+
organize_by_workspace: bool = True,
|
|
279
|
+
parallel: bool = True,
|
|
280
|
+
max_workers: int = 2,
|
|
281
|
+
max_retries: int = 5,
|
|
282
|
+
rate_limit_delay: float = 0.5,
|
|
283
|
+
retry_failed_after: int = 300,
|
|
284
|
+
max_retry_rounds: int = 2
|
|
285
|
+
) -> Dict[str, Any]:
|
|
286
|
+
"""
|
|
287
|
+
Download all semantic model definitions across workspaces with automatic retry rounds
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
workspace_ids: List of workspace IDs (if None, fetches from all workspaces)
|
|
291
|
+
output_base_folder: Base folder for output (default: "bulk_semantic_models")
|
|
292
|
+
use_model_id_as_folder: Use model ID instead of name for folder (default: True)
|
|
293
|
+
organize_by_workspace: Create subfolder for each workspace (default: True)
|
|
294
|
+
parallel: Download in parallel (default: True)
|
|
295
|
+
max_workers: Number of parallel workers (default: 2, reduced to avoid rate limits)
|
|
296
|
+
max_retries: Maximum retry attempts on 429 errors per download (default: 5)
|
|
297
|
+
rate_limit_delay: Delay between requests in seconds (default: 0.5)
|
|
298
|
+
retry_failed_after: Seconds to wait before retrying failed downloads (default: 300 = 5 minutes)
|
|
299
|
+
max_retry_rounds: Maximum number of retry rounds for failed downloads (default: 2)
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
Dictionary with download statistics and results
|
|
303
|
+
"""
|
|
304
|
+
# Get all semantic models
|
|
305
|
+
models = self.get_all_semantic_models(workspace_ids, include_workspace_info=True)
|
|
306
|
+
|
|
307
|
+
results = {
|
|
308
|
+
"total": len(models),
|
|
309
|
+
"successful": 0,
|
|
310
|
+
"failed": 0,
|
|
311
|
+
"downloads": []
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
print(f"\n{'=' * 60}")
|
|
315
|
+
print(f"Starting bulk download of {len(models)} semantic model(s)")
|
|
316
|
+
print(f"{'=' * 60}\n")
|
|
317
|
+
|
|
318
|
+
def download_single_model(model_info):
|
|
319
|
+
"""Download a single model definition with retry logic"""
|
|
320
|
+
workspace_id = model_info.get("_workspace_id") or model_info.get("_workspace", {}).get("id")
|
|
321
|
+
model_id = model_info["id"]
|
|
322
|
+
model_name = model_info.get("displayName", model_id)
|
|
323
|
+
|
|
324
|
+
# Add delay to avoid rate limiting
|
|
325
|
+
time.sleep(rate_limit_delay)
|
|
326
|
+
|
|
327
|
+
# Determine output folder
|
|
328
|
+
if organize_by_workspace:
|
|
329
|
+
workspace_name = model_info.get("_workspace", {}).get("displayName", workspace_id)
|
|
330
|
+
import re
|
|
331
|
+
workspace_folder = re.sub(r'[<>:"/\\|?*]', "_", workspace_name)
|
|
332
|
+
output_folder = f"{output_base_folder}/{workspace_folder}"
|
|
333
|
+
else:
|
|
334
|
+
output_folder = output_base_folder
|
|
335
|
+
|
|
336
|
+
try:
|
|
337
|
+
# Download with retry logic for 429 errors
|
|
338
|
+
result = self._download_with_retry(
|
|
339
|
+
self.semantic_model_client.get_semantic_model_definition,
|
|
340
|
+
max_retries=max_retries,
|
|
341
|
+
workspace_id=workspace_id,
|
|
342
|
+
semantic_model_id=model_id,
|
|
343
|
+
save_to_folder=True,
|
|
344
|
+
output_folder=output_folder,
|
|
345
|
+
use_model_id_as_folder=use_model_id_as_folder
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
print(f" ✓ Downloaded: {model_name} ({len(result.get('saved_files', []))} files)")
|
|
349
|
+
|
|
350
|
+
return {
|
|
351
|
+
"status": "success",
|
|
352
|
+
"model_id": model_id,
|
|
353
|
+
"model_name": model_name,
|
|
354
|
+
"workspace_id": workspace_id,
|
|
355
|
+
"output_folder": result.get("output_folder"),
|
|
356
|
+
"files_count": len(result.get("saved_files", []))
|
|
357
|
+
}
|
|
358
|
+
except Exception as e:
|
|
359
|
+
print(f" ✗ Failed: {model_name} - {str(e)}")
|
|
360
|
+
return {
|
|
361
|
+
"status": "failed",
|
|
362
|
+
"model_id": model_id,
|
|
363
|
+
"model_name": model_name,
|
|
364
|
+
"workspace_id": workspace_id,
|
|
365
|
+
"error": str(e)
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
# Process downloads with retry rounds
|
|
369
|
+
models_to_process = models.copy()
|
|
370
|
+
retry_round = 0
|
|
371
|
+
all_results = []
|
|
372
|
+
|
|
373
|
+
while models_to_process and retry_round < max_retry_rounds:
|
|
374
|
+
retry_round += 1
|
|
375
|
+
|
|
376
|
+
if retry_round > 1:
|
|
377
|
+
print(f"\n{'=' * 60}")
|
|
378
|
+
print(f"🔄 RETRY ROUND {retry_round}: Retrying {len(models_to_process)} failed model(s)")
|
|
379
|
+
print(f"⏰ Waiting {retry_failed_after} seconds before retry...")
|
|
380
|
+
print(f"{'=' * 60}\n")
|
|
381
|
+
time.sleep(retry_failed_after)
|
|
382
|
+
|
|
383
|
+
round_results = []
|
|
384
|
+
|
|
385
|
+
# Download models (parallel or sequential)
|
|
386
|
+
if parallel and len(models_to_process) > 1:
|
|
387
|
+
print(f"⚡ Downloading with {max_workers} parallel workers...\n")
|
|
388
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
389
|
+
futures = {executor.submit(download_single_model, model): model for model in models_to_process}
|
|
390
|
+
|
|
391
|
+
for future in as_completed(futures):
|
|
392
|
+
download_result = future.result()
|
|
393
|
+
round_results.append(download_result)
|
|
394
|
+
|
|
395
|
+
if download_result["status"] == "success":
|
|
396
|
+
results["successful"] += 1
|
|
397
|
+
else:
|
|
398
|
+
results["failed"] += 1
|
|
399
|
+
|
|
400
|
+
# Progress update
|
|
401
|
+
completed = results["successful"] + results["failed"]
|
|
402
|
+
print(f"Progress: {completed}/{results['total']} ({results['successful']} ✓, {results['failed']} ✗)")
|
|
403
|
+
else:
|
|
404
|
+
print("📥 Downloading sequentially...\n")
|
|
405
|
+
for idx, model in enumerate(models_to_process, 1):
|
|
406
|
+
print(f"[{idx}/{len(models_to_process)}] Downloading {model.get('displayName', model['id'])}...")
|
|
407
|
+
download_result = download_single_model(model)
|
|
408
|
+
round_results.append(download_result)
|
|
409
|
+
|
|
410
|
+
if download_result["status"] == "success":
|
|
411
|
+
results["successful"] += 1
|
|
412
|
+
else:
|
|
413
|
+
results["failed"] += 1
|
|
414
|
+
|
|
415
|
+
# Collect failed models for next round
|
|
416
|
+
failed_models = []
|
|
417
|
+
for result in round_results:
|
|
418
|
+
all_results.append(result)
|
|
419
|
+
|
|
420
|
+
if result["status"] == "failed":
|
|
421
|
+
# Find the model info to retry
|
|
422
|
+
model_id = result["model_id"]
|
|
423
|
+
for model in models_to_process:
|
|
424
|
+
if model["id"] == model_id:
|
|
425
|
+
failed_models.append(model)
|
|
426
|
+
break
|
|
427
|
+
|
|
428
|
+
# Prepare for next round
|
|
429
|
+
models_to_process = failed_models
|
|
430
|
+
|
|
431
|
+
# Summary for this round
|
|
432
|
+
round_success = sum(1 for r in round_results if r["status"] == "success")
|
|
433
|
+
round_failed = sum(1 for r in round_results if r["status"] == "failed")
|
|
434
|
+
|
|
435
|
+
print(f"\n{'=' * 60}")
|
|
436
|
+
print(f"Round {retry_round} Summary:")
|
|
437
|
+
print(f" Processed: {len(round_results)}")
|
|
438
|
+
print(f" Successful: {round_success}")
|
|
439
|
+
print(f" Failed: {round_failed}")
|
|
440
|
+
|
|
441
|
+
if models_to_process and retry_round < max_retry_rounds:
|
|
442
|
+
print(f"\n⚠️ {len(models_to_process)} model(s) will be retried in next round")
|
|
443
|
+
elif models_to_process:
|
|
444
|
+
print(f"\n⚠️ {len(models_to_process)} model(s) failed after {retry_round} rounds")
|
|
445
|
+
print(f"{'=' * 60}\n")
|
|
446
|
+
|
|
447
|
+
# Store all results
|
|
448
|
+
results["downloads"] = all_results
|
|
449
|
+
results["retry_rounds"] = retry_round
|
|
450
|
+
|
|
451
|
+
print(f"\n{'=' * 60}")
|
|
452
|
+
print("✅ FINAL BULK DOWNLOAD SUMMARY")
|
|
453
|
+
print(f"{'=' * 60}")
|
|
454
|
+
print(f" Total Models: {results['total']}")
|
|
455
|
+
print(f" Successful: {results['successful']}")
|
|
456
|
+
print(f" Failed: {results['failed']}")
|
|
457
|
+
print(f" Retry Rounds: {retry_round}")
|
|
458
|
+
print(f"{'=' * 60}\n")
|
|
459
|
+
|
|
460
|
+
# Show failed models if any
|
|
461
|
+
if results["failed"] > 0:
|
|
462
|
+
print("❌ Failed Models:")
|
|
463
|
+
for result in all_results:
|
|
464
|
+
if result["status"] == "failed":
|
|
465
|
+
print(f" - {result['model_name']} ({result['model_id']})")
|
|
466
|
+
print(f" Error: {result['error']}")
|
|
467
|
+
print()
|
|
468
|
+
|
|
469
|
+
return results
|
|
470
|
+
|
|
471
|
+
def download_all_report_definitions(
|
|
472
|
+
self,
|
|
473
|
+
workspace_ids: Optional[List[str]] = None,
|
|
474
|
+
output_base_folder: str = "bulk_reports",
|
|
475
|
+
use_report_id_as_folder: bool = True,
|
|
476
|
+
organize_by_workspace: bool = True,
|
|
477
|
+
parallel: bool = True,
|
|
478
|
+
max_workers: int = 2,
|
|
479
|
+
max_retries: int = 5,
|
|
480
|
+
rate_limit_delay: float = 0.5,
|
|
481
|
+
retry_failed_after: int = 300,
|
|
482
|
+
max_retry_rounds: int = 2
|
|
483
|
+
) -> Dict[str, Any]:
|
|
484
|
+
"""
|
|
485
|
+
Download all report definitions across workspaces with automatic retry rounds
|
|
486
|
+
|
|
487
|
+
Args:
|
|
488
|
+
workspace_ids: List of workspace IDs (if None, fetches from all workspaces)
|
|
489
|
+
output_base_folder: Base folder for output (default: "bulk_reports")
|
|
490
|
+
use_report_id_as_folder: Use report ID instead of name for folder (default: True)
|
|
491
|
+
organize_by_workspace: Create subfolder for each workspace (default: True)
|
|
492
|
+
parallel: Download in parallel (default: True)
|
|
493
|
+
max_workers: Number of parallel workers (default: 2, reduced to avoid rate limits)
|
|
494
|
+
max_retries: Maximum retry attempts on 429 errors per download (default: 5)
|
|
495
|
+
rate_limit_delay: Delay between requests in seconds (default: 0.5)
|
|
496
|
+
retry_failed_after: Seconds to wait before retrying failed downloads (default: 300 = 5 minutes)
|
|
497
|
+
max_retry_rounds: Maximum number of retry rounds for failed downloads (default: 2)
|
|
498
|
+
|
|
499
|
+
Returns:
|
|
500
|
+
Dictionary with download statistics and results
|
|
501
|
+
"""
|
|
502
|
+
# Get all reports
|
|
503
|
+
reports = self.get_all_reports(workspace_ids, include_workspace_info=True)
|
|
504
|
+
|
|
505
|
+
results = {
|
|
506
|
+
"total": len(reports),
|
|
507
|
+
"successful": 0,
|
|
508
|
+
"failed": 0,
|
|
509
|
+
"downloads": []
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
print(f"\n{'=' * 60}")
|
|
513
|
+
print(f"Starting bulk download of {len(reports)} report(s)")
|
|
514
|
+
print(f"{'=' * 60}\n")
|
|
515
|
+
|
|
516
|
+
def download_single_report(report_info):
|
|
517
|
+
"""Download a single report definition with retry logic"""
|
|
518
|
+
workspace_id = report_info.get("_workspace_id") or report_info.get("_workspace", {}).get("id")
|
|
519
|
+
report_id = report_info["id"]
|
|
520
|
+
report_name = report_info.get("displayName", report_id)
|
|
521
|
+
|
|
522
|
+
# Add delay to avoid rate limiting
|
|
523
|
+
time.sleep(rate_limit_delay)
|
|
524
|
+
|
|
525
|
+
# Determine output folder
|
|
526
|
+
if organize_by_workspace:
|
|
527
|
+
workspace_name = report_info.get("_workspace", {}).get("displayName", workspace_id)
|
|
528
|
+
import re
|
|
529
|
+
workspace_folder = re.sub(r'[<>:"/\\|?*]', "_", workspace_name)
|
|
530
|
+
output_folder = f"{output_base_folder}/{workspace_folder}"
|
|
531
|
+
else:
|
|
532
|
+
output_folder = output_base_folder
|
|
533
|
+
|
|
534
|
+
try:
|
|
535
|
+
# Download with retry logic for 429 errors
|
|
536
|
+
result = self._download_with_retry(
|
|
537
|
+
self.report_client.get_report_definition,
|
|
538
|
+
max_retries=max_retries,
|
|
539
|
+
workspace_id=workspace_id,
|
|
540
|
+
report_id=report_id,
|
|
541
|
+
save_to_folder=True,
|
|
542
|
+
output_folder=output_folder,
|
|
543
|
+
use_report_id_as_folder=use_report_id_as_folder
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
print(f" ✓ Downloaded: {report_name} ({len(result.get('saved_files', []))} files)")
|
|
547
|
+
|
|
548
|
+
return {
|
|
549
|
+
"status": "success",
|
|
550
|
+
"report_id": report_id,
|
|
551
|
+
"report_name": report_name,
|
|
552
|
+
"workspace_id": workspace_id,
|
|
553
|
+
"output_folder": result.get("output_folder"),
|
|
554
|
+
"files_count": len(result.get("saved_files", []))
|
|
555
|
+
}
|
|
556
|
+
except Exception as e:
|
|
557
|
+
print(f" ✗ Failed: {report_name} - {str(e)}")
|
|
558
|
+
return {
|
|
559
|
+
"status": "failed",
|
|
560
|
+
"report_id": report_id,
|
|
561
|
+
"report_name": report_name,
|
|
562
|
+
"workspace_id": workspace_id,
|
|
563
|
+
"error": str(e)
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
# Process downloads with retry rounds
|
|
567
|
+
reports_to_process = reports.copy()
|
|
568
|
+
retry_round = 0
|
|
569
|
+
all_results = []
|
|
570
|
+
|
|
571
|
+
while reports_to_process and retry_round < max_retry_rounds:
|
|
572
|
+
retry_round += 1
|
|
573
|
+
|
|
574
|
+
if retry_round > 1:
|
|
575
|
+
print(f"\n{'=' * 60}")
|
|
576
|
+
print(f"🔄 RETRY ROUND {retry_round}: Retrying {len(reports_to_process)} failed report(s)")
|
|
577
|
+
print(f"⏰ Waiting {retry_failed_after} seconds before retry...")
|
|
578
|
+
print(f"{'=' * 60}\n")
|
|
579
|
+
time.sleep(retry_failed_after)
|
|
580
|
+
|
|
581
|
+
round_results = []
|
|
582
|
+
|
|
583
|
+
# Download reports (parallel or sequential)
|
|
584
|
+
if parallel and len(reports_to_process) > 1:
|
|
585
|
+
print(f"⚡ Downloading with {max_workers} parallel workers...\n")
|
|
586
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
587
|
+
futures = {executor.submit(download_single_report, report): report for report in reports_to_process}
|
|
588
|
+
|
|
589
|
+
for future in as_completed(futures):
|
|
590
|
+
download_result = future.result()
|
|
591
|
+
round_results.append(download_result)
|
|
592
|
+
|
|
593
|
+
if download_result["status"] == "success":
|
|
594
|
+
results["successful"] += 1
|
|
595
|
+
else:
|
|
596
|
+
results["failed"] += 1
|
|
597
|
+
|
|
598
|
+
# Progress update
|
|
599
|
+
completed = results["successful"] + results["failed"]
|
|
600
|
+
print(f"Progress: {completed}/{results['total']} ({results['successful']} ✓, {results['failed']} ✗)")
|
|
601
|
+
else:
|
|
602
|
+
print("📥 Downloading sequentially...\n")
|
|
603
|
+
for idx, report in enumerate(reports_to_process, 1):
|
|
604
|
+
print(f"[{idx}/{len(reports_to_process)}] Downloading {report.get('displayName', report['id'])}...")
|
|
605
|
+
download_result = download_single_report(report)
|
|
606
|
+
round_results.append(download_result)
|
|
607
|
+
|
|
608
|
+
if download_result["status"] == "success":
|
|
609
|
+
results["successful"] += 1
|
|
610
|
+
else:
|
|
611
|
+
results["failed"] += 1
|
|
612
|
+
|
|
613
|
+
# Collect failed reports for next round
|
|
614
|
+
failed_reports = []
|
|
615
|
+
for result in round_results:
|
|
616
|
+
all_results.append(result)
|
|
617
|
+
|
|
618
|
+
if result["status"] == "failed":
|
|
619
|
+
# Find the report info to retry
|
|
620
|
+
report_id = result["report_id"]
|
|
621
|
+
for report in reports_to_process:
|
|
622
|
+
if report["id"] == report_id:
|
|
623
|
+
failed_reports.append(report)
|
|
624
|
+
break
|
|
625
|
+
|
|
626
|
+
# Prepare for next round
|
|
627
|
+
reports_to_process = failed_reports
|
|
628
|
+
|
|
629
|
+
# Summary for this round
|
|
630
|
+
round_success = sum(1 for r in round_results if r["status"] == "success")
|
|
631
|
+
round_failed = sum(1 for r in round_results if r["status"] == "failed")
|
|
632
|
+
|
|
633
|
+
print(f"\n{'=' * 60}")
|
|
634
|
+
print(f"Round {retry_round} Summary:")
|
|
635
|
+
print(f" Processed: {len(round_results)}")
|
|
636
|
+
print(f" Successful: {round_success}")
|
|
637
|
+
print(f" Failed: {round_failed}")
|
|
638
|
+
|
|
639
|
+
if reports_to_process and retry_round < max_retry_rounds:
|
|
640
|
+
print(f"\n⚠️ {len(reports_to_process)} report(s) will be retried in next round")
|
|
641
|
+
elif reports_to_process:
|
|
642
|
+
print(f"\n⚠️ {len(reports_to_process)} report(s) failed after {retry_round} rounds")
|
|
643
|
+
print(f"{'=' * 60}\n")
|
|
644
|
+
|
|
645
|
+
# Store all results
|
|
646
|
+
results["downloads"] = all_results
|
|
647
|
+
results["retry_rounds"] = retry_round
|
|
648
|
+
|
|
649
|
+
print(f"\n{'=' * 60}")
|
|
650
|
+
print("✅ FINAL BULK DOWNLOAD SUMMARY")
|
|
651
|
+
print(f"{'=' * 60}")
|
|
652
|
+
print(f" Total Reports: {results['total']}")
|
|
653
|
+
print(f" Successful: {results['successful']}")
|
|
654
|
+
print(f" Failed: {results['failed']}")
|
|
655
|
+
print(f" Retry Rounds: {retry_round}")
|
|
656
|
+
print(f"{'=' * 60}\n")
|
|
657
|
+
|
|
658
|
+
# Show failed reports if any
|
|
659
|
+
if results["failed"] > 0:
|
|
660
|
+
print("❌ Failed Reports:")
|
|
661
|
+
for result in all_results:
|
|
662
|
+
if result["status"] == "failed":
|
|
663
|
+
print(f" - {result['report_name']} ({result['report_id']})")
|
|
664
|
+
print(f" Error: {result['error']}")
|
|
665
|
+
print()
|
|
666
|
+
|
|
667
|
+
return results
|
|
668
|
+
|
|
669
|
+
def download_all_definitions(
|
|
670
|
+
self,
|
|
671
|
+
workspace_ids: Optional[List[str]] = None,
|
|
672
|
+
output_base_folder: str = "bulk_fabric_items",
|
|
673
|
+
include_semantic_models: bool = True,
|
|
674
|
+
include_reports: bool = True,
|
|
675
|
+
organize_by_workspace: bool = True,
|
|
676
|
+
parallel: bool = True
|
|
677
|
+
) -> Dict[str, Any]:
|
|
678
|
+
"""
|
|
679
|
+
Download all semantic models and reports across workspaces
|
|
680
|
+
|
|
681
|
+
Args:
|
|
682
|
+
workspace_ids: List of workspace IDs (if None, fetches from all workspaces)
|
|
683
|
+
output_base_folder: Base folder for output
|
|
684
|
+
include_semantic_models: Include semantic models (default: True)
|
|
685
|
+
include_reports: Include reports (default: True)
|
|
686
|
+
organize_by_workspace: Create subfolder for each workspace (default: True)
|
|
687
|
+
parallel: Download in parallel (default: True)
|
|
688
|
+
|
|
689
|
+
Returns:
|
|
690
|
+
Dictionary with combined download statistics
|
|
691
|
+
"""
|
|
692
|
+
results = {
|
|
693
|
+
"semantic_models": None,
|
|
694
|
+
"reports": None
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
if include_semantic_models:
|
|
698
|
+
sm_folder = f"{output_base_folder}/semantic_models" if not organize_by_workspace else output_base_folder
|
|
699
|
+
results["semantic_models"] = self.download_all_semantic_model_definitions(
|
|
700
|
+
workspace_ids=workspace_ids,
|
|
701
|
+
output_base_folder=sm_folder,
|
|
702
|
+
organize_by_workspace=organize_by_workspace,
|
|
703
|
+
parallel=parallel
|
|
704
|
+
)
|
|
705
|
+
|
|
706
|
+
if include_reports:
|
|
707
|
+
report_folder = f"{output_base_folder}/reports" if not organize_by_workspace else output_base_folder
|
|
708
|
+
results["reports"] = self.download_all_report_definitions(
|
|
709
|
+
workspace_ids=workspace_ids,
|
|
710
|
+
output_base_folder=report_folder,
|
|
711
|
+
organize_by_workspace=organize_by_workspace,
|
|
712
|
+
parallel=parallel
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
return results
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
# Example usage
|
|
719
|
+
if __name__ == "__main__":
|
|
720
|
+
print("BulkClient - Bulk operations for Microsoft Fabric")
|
|
721
|
+
print("\nExample usage:")
|
|
722
|
+
print(" from bulk_client import BulkClient")
|
|
723
|
+
print(" client = BulkClient()")
|
|
724
|
+
print(" ")
|
|
725
|
+
print(" # Get all workspaces")
|
|
726
|
+
print(" workspaces = client.get_all_workspaces()")
|
|
727
|
+
print(" ")
|
|
728
|
+
print(" # Get all semantic models")
|
|
729
|
+
print(" models = client.get_all_semantic_models()")
|
|
730
|
+
print(" ")
|
|
731
|
+
print(" # Download all definitions")
|
|
732
|
+
print(" results = client.download_all_definitions()")
|
|
733
|
+
|