duckrun 0.2.9.dev5__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckrun/core.py +162 -8
- duckrun/semantic_model.py +309 -22
- duckrun/stats.py +14 -7
- {duckrun-0.2.9.dev5.dist-info → duckrun-0.2.10.dist-info}/METADATA +41 -3
- duckrun-0.2.10.dist-info/RECORD +14 -0
- duckrun-0.2.9.dev5.dist-info/RECORD +0 -14
- {duckrun-0.2.9.dev5.dist-info → duckrun-0.2.10.dist-info}/WHEEL +0 -0
- {duckrun-0.2.9.dev5.dist-info → duckrun-0.2.10.dist-info}/licenses/LICENSE +0 -0
- {duckrun-0.2.9.dev5.dist-info → duckrun-0.2.10.dist-info}/top_level.txt +0 -0
duckrun/core.py
CHANGED
@@ -2,6 +2,8 @@ import duckdb
|
|
2
2
|
import requests
|
3
3
|
import os
|
4
4
|
import importlib.util
|
5
|
+
import json
|
6
|
+
import time
|
5
7
|
from deltalake import DeltaTable, write_deltalake
|
6
8
|
from typing import List, Tuple, Union, Optional, Callable, Dict, Any
|
7
9
|
from string import Template
|
@@ -702,8 +704,11 @@ class Duckrun:
|
|
702
704
|
Deploy a semantic model from a BIM file using DirectLake mode.
|
703
705
|
|
704
706
|
Args:
|
705
|
-
bim_url:
|
706
|
-
|
707
|
+
bim_url: Can be:
|
708
|
+
- URL: "https://raw.githubusercontent.com/.../model.bim"
|
709
|
+
- Local file: "model.bim"
|
710
|
+
- Workspace/Model: "workspace_name/model_name"
|
711
|
+
dataset_name: Name for the semantic model (default: source model name if workspace/model format, else lakehouse_schema)
|
707
712
|
wait_seconds: Seconds to wait for permission propagation (default: 5)
|
708
713
|
|
709
714
|
Returns:
|
@@ -712,18 +717,28 @@ class Duckrun:
|
|
712
717
|
Examples:
|
713
718
|
dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
|
714
719
|
|
715
|
-
# Deploy
|
716
|
-
dr.deploy("
|
720
|
+
# Deploy from workspace/model (uses same name by default)
|
721
|
+
dr.deploy("Source Workspace/Source Model") # Creates "Source Model"
|
717
722
|
|
718
723
|
# Deploy with custom name
|
719
|
-
dr.deploy("
|
720
|
-
|
724
|
+
dr.deploy("Source Workspace/Source Model", dataset_name="Sales Model Copy")
|
725
|
+
|
726
|
+
# Deploy from URL or local file
|
727
|
+
dr.deploy("https://raw.githubusercontent.com/.../model.bim", dataset_name="My Model")
|
721
728
|
"""
|
722
729
|
from .semantic_model import deploy_semantic_model
|
723
730
|
|
724
731
|
# Auto-generate dataset name if not provided
|
725
732
|
if dataset_name is None:
|
726
|
-
|
733
|
+
# If using workspace/model format, use the model name
|
734
|
+
if "/" in bim_url and not bim_url.startswith(('http://', 'https://')):
|
735
|
+
parts = bim_url.split("/")
|
736
|
+
if len(parts) == 2:
|
737
|
+
dataset_name = parts[1] # Use the model name
|
738
|
+
else:
|
739
|
+
dataset_name = f"{self.lakehouse_name}_{self.schema}"
|
740
|
+
else:
|
741
|
+
dataset_name = f"{self.lakehouse_name}_{self.schema}"
|
727
742
|
|
728
743
|
# Call the deployment function (DirectLake only)
|
729
744
|
return deploy_semantic_model(
|
@@ -731,7 +746,7 @@ class Duckrun:
|
|
731
746
|
lakehouse_name_or_id=self.lakehouse_name,
|
732
747
|
schema_name=self.schema,
|
733
748
|
dataset_name=dataset_name,
|
734
|
-
|
749
|
+
bim_url_or_path=bim_url,
|
735
750
|
wait_seconds=wait_seconds
|
736
751
|
)
|
737
752
|
|
@@ -864,6 +879,145 @@ class WorkspaceConnection:
|
|
864
879
|
print(f"❌ Error creating lakehouse '{lakehouse_name}': {e}")
|
865
880
|
return False
|
866
881
|
|
882
|
+
def download_bim(self, semantic_model_name: str, output_path: Optional[str] = None) -> Optional[str]:
|
883
|
+
"""
|
884
|
+
Download a semantic model as a BIM (Business Intelligence Model) file.
|
885
|
+
|
886
|
+
Args:
|
887
|
+
semantic_model_name: Name of the semantic model to download
|
888
|
+
output_path: Optional path to save the BIM file. If not provided, returns the BIM content as JSON string
|
889
|
+
|
890
|
+
Returns:
|
891
|
+
BIM content as JSON string if output_path is None, or the file path if saved successfully
|
892
|
+
|
893
|
+
Example:
|
894
|
+
con = duckrun.connect("My Workspace")
|
895
|
+
# Get BIM content as string
|
896
|
+
bim_content = con.download_bim("Sales Model")
|
897
|
+
# Or save to file
|
898
|
+
con.download_bim("Sales Model", "sales_model.bim")
|
899
|
+
"""
|
900
|
+
try:
|
901
|
+
# Get authentication token
|
902
|
+
from .auth import get_fabric_api_token
|
903
|
+
token = get_fabric_api_token()
|
904
|
+
if not token:
|
905
|
+
print("❌ Failed to authenticate for downloading semantic model")
|
906
|
+
return None
|
907
|
+
|
908
|
+
# Resolve workspace name to ID
|
909
|
+
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
910
|
+
if not workspace_id:
|
911
|
+
print(f"❌ Workspace '{self.workspace_name}' not found")
|
912
|
+
return None
|
913
|
+
|
914
|
+
# Get semantic model ID
|
915
|
+
print(f"🔍 Looking for semantic model '{semantic_model_name}'...")
|
916
|
+
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/semanticModels"
|
917
|
+
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
918
|
+
|
919
|
+
response = requests.get(url, headers=headers)
|
920
|
+
response.raise_for_status()
|
921
|
+
|
922
|
+
models = response.json().get("value", [])
|
923
|
+
model = next((m for m in models if m.get("displayName") == semantic_model_name), None)
|
924
|
+
|
925
|
+
if not model:
|
926
|
+
print(f"❌ Semantic model '{semantic_model_name}' not found in workspace '{self.workspace_name}'")
|
927
|
+
return None
|
928
|
+
|
929
|
+
model_id = model.get("id")
|
930
|
+
print(f"✓ Found semantic model: {semantic_model_name} (ID: {model_id})")
|
931
|
+
|
932
|
+
# Get the model definition using the generic items API
|
933
|
+
print("📥 Downloading BIM definition...")
|
934
|
+
definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{model_id}/getDefinition"
|
935
|
+
|
936
|
+
# POST request to get definition with TMSL format (which includes model.bim)
|
937
|
+
# Note: format parameter should be in query string, not body
|
938
|
+
response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
|
939
|
+
response.raise_for_status()
|
940
|
+
|
941
|
+
# Handle long-running operation if needed
|
942
|
+
if response.status_code == 202:
|
943
|
+
operation_id = response.headers.get('x-ms-operation-id')
|
944
|
+
print(f" Waiting for operation to complete...")
|
945
|
+
|
946
|
+
max_attempts = 30
|
947
|
+
for attempt in range(max_attempts):
|
948
|
+
time.sleep(2)
|
949
|
+
|
950
|
+
# Get operation result
|
951
|
+
result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
|
952
|
+
result_response = requests.get(result_url, headers=headers)
|
953
|
+
|
954
|
+
# Check operation status
|
955
|
+
status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
|
956
|
+
status_response = requests.get(status_url, headers=headers)
|
957
|
+
status = status_response.json().get('status')
|
958
|
+
|
959
|
+
if status == 'Succeeded':
|
960
|
+
result_data = result_response.json()
|
961
|
+
break
|
962
|
+
elif status == 'Failed':
|
963
|
+
error = status_response.json().get('error', {})
|
964
|
+
print(f"❌ Operation failed: {error.get('message')}")
|
965
|
+
return None
|
966
|
+
elif attempt == max_attempts - 1:
|
967
|
+
print("❌ Operation timed out")
|
968
|
+
return None
|
969
|
+
else:
|
970
|
+
result_data = response.json()
|
971
|
+
|
972
|
+
# Extract BIM content from definition
|
973
|
+
definition = result_data.get('definition', {})
|
974
|
+
parts = definition.get('parts', [])
|
975
|
+
|
976
|
+
# Debug: show what parts we have
|
977
|
+
if not parts:
|
978
|
+
print("❌ No definition parts found in response")
|
979
|
+
print(f" Result data keys: {list(result_data.keys())}")
|
980
|
+
print(f" Definition keys: {list(definition.keys()) if definition else 'None'}")
|
981
|
+
return None
|
982
|
+
|
983
|
+
print(f" Found {len(parts)} definition parts:")
|
984
|
+
for part in parts:
|
985
|
+
print(f" - {part.get('path', 'unknown')}")
|
986
|
+
|
987
|
+
bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
|
988
|
+
if not bim_part:
|
989
|
+
print("❌ No BIM file found in semantic model definition")
|
990
|
+
print(f" Looking for files ending with '.bim', found: {[p.get('path') for p in parts]}")
|
991
|
+
return None
|
992
|
+
|
993
|
+
# Decode the BIM content (it's base64 encoded)
|
994
|
+
import base64
|
995
|
+
bim_payload = bim_part.get('payload', '')
|
996
|
+
bim_content = base64.b64decode(bim_payload).decode('utf-8')
|
997
|
+
bim_json = json.loads(bim_content)
|
998
|
+
|
999
|
+
# Format as pretty JSON
|
1000
|
+
bim_formatted = json.dumps(bim_json, indent=2)
|
1001
|
+
|
1002
|
+
print(f"✓ BIM file downloaded successfully")
|
1003
|
+
print(f" - Tables: {len(bim_json.get('model', {}).get('tables', []))}")
|
1004
|
+
print(f" - Relationships: {len(bim_json.get('model', {}).get('relationships', []))}")
|
1005
|
+
|
1006
|
+
# Save to file or return content
|
1007
|
+
if output_path:
|
1008
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
1009
|
+
f.write(bim_formatted)
|
1010
|
+
print(f"✓ Saved to: {output_path}")
|
1011
|
+
return output_path
|
1012
|
+
else:
|
1013
|
+
return bim_formatted
|
1014
|
+
|
1015
|
+
except Exception as e:
|
1016
|
+
print(f"❌ Error downloading semantic model: {e}")
|
1017
|
+
import traceback
|
1018
|
+
traceback.print_exc()
|
1019
|
+
return None
|
1020
|
+
|
867
1021
|
def _get_workspace_id_by_name(self, token: str, workspace_name: str) -> Optional[str]:
|
868
1022
|
"""Helper method to get workspace ID from name"""
|
869
1023
|
try:
|
duckrun/semantic_model.py
CHANGED
@@ -186,13 +186,107 @@ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
|
|
186
186
|
response.raise_for_status()
|
187
187
|
|
188
188
|
|
189
|
-
def download_bim_from_github(
|
190
|
-
"""
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
189
|
+
def download_bim_from_github(url_or_path):
|
190
|
+
"""
|
191
|
+
Load BIM file from URL, local file path, or workspace/model format.
|
192
|
+
|
193
|
+
Args:
|
194
|
+
url_or_path: Can be:
|
195
|
+
- Local file path: "model.bim"
|
196
|
+
- URL: "https://..."
|
197
|
+
- Workspace/Model: "workspace_name/semantic_model_name"
|
198
|
+
|
199
|
+
Returns:
|
200
|
+
BIM content as dictionary
|
201
|
+
"""
|
202
|
+
import os
|
203
|
+
import tempfile
|
204
|
+
|
205
|
+
# Check if it's a local file path
|
206
|
+
if os.path.exists(url_or_path):
|
207
|
+
print(f"Loading BIM file from local path...")
|
208
|
+
with open(url_or_path, 'r', encoding='utf-8') as f:
|
209
|
+
bim_content = json.load(f)
|
210
|
+
print(f"✓ BIM file loaded from: {url_or_path}")
|
211
|
+
# Check if it's a URL
|
212
|
+
elif url_or_path.startswith(('http://', 'https://')):
|
213
|
+
print(f"Downloading BIM file from URL...")
|
214
|
+
response = requests.get(url_or_path)
|
215
|
+
response.raise_for_status()
|
216
|
+
bim_content = response.json()
|
217
|
+
print(f"✓ BIM file downloaded from URL")
|
218
|
+
# Check if it's workspace/model format
|
219
|
+
elif "/" in url_or_path and not os.path.exists(url_or_path):
|
220
|
+
print(f"Downloading BIM from workspace/model...")
|
221
|
+
parts = url_or_path.split("/")
|
222
|
+
if len(parts) != 2:
|
223
|
+
raise ValueError(f"Invalid workspace/model format: '{url_or_path}'. Expected: 'workspace_name/model_name'")
|
224
|
+
|
225
|
+
ws_name, model_name = parts
|
226
|
+
|
227
|
+
# Download BIM from the semantic model
|
228
|
+
client = FabricRestClient()
|
229
|
+
ws_id = get_workspace_id(ws_name, client)
|
230
|
+
|
231
|
+
# Get semantic model ID
|
232
|
+
response = client.get(f"/v1/workspaces/{ws_id}/semanticModels")
|
233
|
+
models = response.json().get('value', [])
|
234
|
+
model = next((m for m in models if m.get('displayName') == model_name), None)
|
235
|
+
|
236
|
+
if not model:
|
237
|
+
raise ValueError(f"Semantic model '{model_name}' not found in workspace '{ws_name}'")
|
238
|
+
|
239
|
+
model_id = model.get('id')
|
240
|
+
|
241
|
+
# Get definition using Items API with TMSL format
|
242
|
+
definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{ws_id}/items/{model_id}/getDefinition"
|
243
|
+
headers = client._get_headers()
|
244
|
+
response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
|
245
|
+
response.raise_for_status()
|
246
|
+
|
247
|
+
# Handle long-running operation
|
248
|
+
if response.status_code == 202:
|
249
|
+
operation_id = response.headers.get('x-ms-operation-id')
|
250
|
+
max_attempts = 30
|
251
|
+
|
252
|
+
for attempt in range(max_attempts):
|
253
|
+
time.sleep(2)
|
254
|
+
|
255
|
+
status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
|
256
|
+
status_response = requests.get(status_url, headers=headers)
|
257
|
+
status = status_response.json().get('status')
|
258
|
+
|
259
|
+
if status == 'Succeeded':
|
260
|
+
result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
|
261
|
+
result_response = requests.get(result_url, headers=headers)
|
262
|
+
result_data = result_response.json()
|
263
|
+
break
|
264
|
+
elif status == 'Failed':
|
265
|
+
error = status_response.json().get('error', {})
|
266
|
+
raise Exception(f"Download operation failed: {error.get('message')}")
|
267
|
+
elif attempt == max_attempts - 1:
|
268
|
+
raise Exception("Download operation timed out")
|
269
|
+
else:
|
270
|
+
result_data = response.json()
|
271
|
+
|
272
|
+
# Extract BIM content
|
273
|
+
definition = result_data.get('definition', {})
|
274
|
+
parts = definition.get('parts', [])
|
275
|
+
|
276
|
+
bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
|
277
|
+
if not bim_part:
|
278
|
+
raise Exception("No BIM file found in semantic model definition")
|
279
|
+
|
280
|
+
# Decode BIM
|
281
|
+
import base64
|
282
|
+
bim_payload = bim_part.get('payload', '')
|
283
|
+
bim_content_str = base64.b64decode(bim_payload).decode('utf-8')
|
284
|
+
bim_content = json.loads(bim_content_str)
|
285
|
+
|
286
|
+
print(f"✓ BIM downloaded from {ws_name}/{model_name}")
|
287
|
+
else:
|
288
|
+
raise ValueError(f"Invalid BIM source: '{url_or_path}'. Must be a valid file path, URL, or 'workspace/model' format.")
|
289
|
+
|
196
290
|
print(f" - Tables: {len(bim_content.get('model', {}).get('tables', []))}")
|
197
291
|
print(f" - Relationships: {len(bim_content.get('model', {}).get('relationships', []))}")
|
198
292
|
return bim_content
|
@@ -299,23 +393,27 @@ def create_dataset_from_bim(dataset_name, bim_content, workspace_id, client):
|
|
299
393
|
for attempt in range(max_attempts):
|
300
394
|
time.sleep(2)
|
301
395
|
|
302
|
-
# Get operation result (not just status)
|
303
|
-
result_response = client.get(f"/v1/operations/{operation_id}/result")
|
304
|
-
|
305
396
|
# Check if operation is complete by getting the status
|
306
397
|
status_response = client.get(f"/v1/operations/{operation_id}")
|
307
398
|
status = status_response.json().get('status')
|
308
399
|
|
309
400
|
if status == 'Succeeded':
|
310
401
|
print(f"✓ Operation completed")
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
402
|
+
|
403
|
+
# Now get the result (only after status is Succeeded)
|
404
|
+
try:
|
405
|
+
result_response = client.get(f"/v1/operations/{operation_id}/result")
|
406
|
+
result_data = result_response.json()
|
407
|
+
dataset_id = result_data.get('id')
|
408
|
+
if dataset_id:
|
409
|
+
return dataset_id
|
410
|
+
except:
|
411
|
+
# If result endpoint fails, fallback to searching by name
|
412
|
+
pass
|
413
|
+
|
414
|
+
# Fallback: search for the dataset by name
|
415
|
+
return get_dataset_id(dataset_name, workspace_id, client)
|
416
|
+
|
319
417
|
elif status == 'Failed':
|
320
418
|
error = status_response.json().get('error', {})
|
321
419
|
raise Exception(f"Operation failed: {error.get('message')}")
|
@@ -333,7 +431,7 @@ def create_dataset_from_bim(dataset_name, bim_content, workspace_id, client):
|
|
333
431
|
|
334
432
|
|
335
433
|
def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_name, dataset_name,
|
336
|
-
|
434
|
+
bim_url_or_path, wait_seconds=5):
|
337
435
|
"""
|
338
436
|
Deploy a semantic model using DirectLake mode.
|
339
437
|
|
@@ -342,15 +440,20 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
|
|
342
440
|
lakehouse_name_or_id: Name or GUID of the lakehouse
|
343
441
|
schema_name: Schema name (e.g., 'dbo', 'staging')
|
344
442
|
dataset_name: Name for the semantic model
|
345
|
-
|
443
|
+
bim_url_or_path: URL to the BIM file or local file path (e.g., 'model.bim' or 'https://...')
|
346
444
|
wait_seconds: Seconds to wait before refresh (default: 5)
|
347
445
|
|
348
446
|
Returns:
|
349
447
|
1 for success, 0 for failure
|
350
448
|
|
351
449
|
Examples:
|
450
|
+
# Using a URL
|
352
451
|
dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
|
353
452
|
dr.deploy("https://raw.githubusercontent.com/.../model.bim")
|
453
|
+
|
454
|
+
# Using a local file
|
455
|
+
dr.deploy("./my_model.bim")
|
456
|
+
dr.deploy("C:/path/to/model.bim")
|
354
457
|
"""
|
355
458
|
print("=" * 70)
|
356
459
|
print("Semantic Model Deployment (DirectLake)")
|
@@ -389,8 +492,8 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
|
|
389
492
|
lakehouse_id = get_lakehouse_id(lakehouse_name_or_id, workspace_id, client)
|
390
493
|
|
391
494
|
# Step 4: Download and update BIM
|
392
|
-
print("\n[Step 4/6]
|
393
|
-
bim_content = download_bim_from_github(
|
495
|
+
print("\n[Step 4/6] Loading and configuring BIM file...")
|
496
|
+
bim_content = download_bim_from_github(bim_url_or_path)
|
394
497
|
|
395
498
|
modified_bim = update_bim_for_directlake(bim_content, workspace_id, lakehouse_id, schema_name)
|
396
499
|
modified_bim['name'] = dataset_name
|
@@ -432,3 +535,187 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
|
|
432
535
|
print(f" - Check tables are in Delta format")
|
433
536
|
print("=" * 70)
|
434
537
|
return 0
|
538
|
+
|
539
|
+
|
540
|
+
def copy_model(ws_source, model_name, destination, new_model_name=None, wait_seconds=5):
|
541
|
+
"""
|
542
|
+
Copy a semantic model from one workspace to another.
|
543
|
+
|
544
|
+
This is a convenience function that downloads a BIM file from a source workspace
|
545
|
+
and deploys it to a destination lakehouse in one operation.
|
546
|
+
|
547
|
+
Args:
|
548
|
+
ws_source: Source workspace name or GUID
|
549
|
+
model_name: Name of the semantic model to copy
|
550
|
+
destination: Destination in format "workspace/lakehouse.lakehouse/schema"
|
551
|
+
new_model_name: Name for the new semantic model (default: same as source)
|
552
|
+
wait_seconds: Seconds to wait before refresh (default: 5)
|
553
|
+
|
554
|
+
Returns:
|
555
|
+
1 for success, 0 for failure
|
556
|
+
|
557
|
+
Examples:
|
558
|
+
# Copy to same workspace, different lakehouse
|
559
|
+
copy_model("My Workspace", "Sales Model", "My Workspace/Target Lakehouse.lakehouse/dbo")
|
560
|
+
|
561
|
+
# Copy to different workspace with new name
|
562
|
+
copy_model("Source WS", "Production Model", "Target WS/Data Lake.lakehouse/analytics",
|
563
|
+
new_model_name="Production Model - Copy")
|
564
|
+
|
565
|
+
# Using the connect pattern
|
566
|
+
import duckrun
|
567
|
+
duckrun.semantic_model.copy_model("Source", "Model", "Target/LH.lakehouse/dbo")
|
568
|
+
"""
|
569
|
+
import tempfile
|
570
|
+
import os
|
571
|
+
|
572
|
+
print("=" * 70)
|
573
|
+
print("Semantic Model Copy Operation")
|
574
|
+
print("=" * 70)
|
575
|
+
|
576
|
+
try:
|
577
|
+
# Parse destination
|
578
|
+
parts = destination.split("/")
|
579
|
+
if len(parts) != 3:
|
580
|
+
raise ValueError(
|
581
|
+
f"Invalid destination format: '{destination}'. "
|
582
|
+
"Expected format: 'workspace/lakehouse.lakehouse/schema'"
|
583
|
+
)
|
584
|
+
|
585
|
+
ws_dest, lakehouse, schema = parts
|
586
|
+
|
587
|
+
# Remove .lakehouse suffix if present
|
588
|
+
if lakehouse.endswith(".lakehouse"):
|
589
|
+
lakehouse = lakehouse[:-10]
|
590
|
+
|
591
|
+
# Use source model name if new name not provided
|
592
|
+
if not new_model_name:
|
593
|
+
new_model_name = model_name
|
594
|
+
|
595
|
+
print(f"\nSource:")
|
596
|
+
print(f" Workspace: {ws_source}")
|
597
|
+
print(f" Model: {model_name}")
|
598
|
+
print(f"\nDestination:")
|
599
|
+
print(f" Workspace: {ws_dest}")
|
600
|
+
print(f" Lakehouse: {lakehouse}")
|
601
|
+
print(f" Schema: {schema}")
|
602
|
+
print(f" New Model Name: {new_model_name}")
|
603
|
+
|
604
|
+
# Step 1: Download BIM from source
|
605
|
+
print("\n" + "-" * 70)
|
606
|
+
print("[Step 1/2] Downloading BIM from source workspace...")
|
607
|
+
print("-" * 70)
|
608
|
+
|
609
|
+
client = FabricRestClient()
|
610
|
+
ws_source_id = get_workspace_id(ws_source, client)
|
611
|
+
|
612
|
+
# Use temporary file for BIM content
|
613
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.bim', delete=False, encoding='utf-8') as tmp_file:
|
614
|
+
temp_bim_path = tmp_file.name
|
615
|
+
|
616
|
+
# Get semantic model ID
|
617
|
+
response = client.get(f"/v1/workspaces/{ws_source_id}/semanticModels")
|
618
|
+
models = response.json().get('value', [])
|
619
|
+
model = next((m for m in models if m.get('displayName') == model_name), None)
|
620
|
+
|
621
|
+
if not model:
|
622
|
+
raise ValueError(f"Semantic model '{model_name}' not found in workspace '{ws_source}'")
|
623
|
+
|
624
|
+
model_id = model.get('id')
|
625
|
+
print(f"✓ Found source model: {model_name} (ID: {model_id})")
|
626
|
+
|
627
|
+
# Get definition using Items API with TMSL format
|
628
|
+
print(" Downloading BIM definition...")
|
629
|
+
definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{ws_source_id}/items/{model_id}/getDefinition"
|
630
|
+
headers = client._get_headers()
|
631
|
+
response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
|
632
|
+
response.raise_for_status()
|
633
|
+
|
634
|
+
# Handle long-running operation
|
635
|
+
if response.status_code == 202:
|
636
|
+
operation_id = response.headers.get('x-ms-operation-id')
|
637
|
+
max_attempts = 30
|
638
|
+
|
639
|
+
for attempt in range(max_attempts):
|
640
|
+
time.sleep(2)
|
641
|
+
|
642
|
+
status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
|
643
|
+
status_response = requests.get(status_url, headers=headers)
|
644
|
+
status = status_response.json().get('status')
|
645
|
+
|
646
|
+
if status == 'Succeeded':
|
647
|
+
result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
|
648
|
+
result_response = requests.get(result_url, headers=headers)
|
649
|
+
result_data = result_response.json()
|
650
|
+
break
|
651
|
+
elif status == 'Failed':
|
652
|
+
error = status_response.json().get('error', {})
|
653
|
+
raise Exception(f"Download operation failed: {error.get('message')}")
|
654
|
+
elif attempt == max_attempts - 1:
|
655
|
+
raise Exception("Download operation timed out")
|
656
|
+
else:
|
657
|
+
result_data = response.json()
|
658
|
+
|
659
|
+
# Extract BIM content
|
660
|
+
definition = result_data.get('definition', {})
|
661
|
+
parts = definition.get('parts', [])
|
662
|
+
|
663
|
+
bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
|
664
|
+
if not bim_part:
|
665
|
+
raise Exception("No BIM file found in semantic model definition")
|
666
|
+
|
667
|
+
# Decode and save BIM
|
668
|
+
import base64
|
669
|
+
bim_payload = bim_part.get('payload', '')
|
670
|
+
bim_content = base64.b64decode(bim_payload).decode('utf-8')
|
671
|
+
bim_json = json.loads(bim_content)
|
672
|
+
|
673
|
+
# Write to temp file
|
674
|
+
json.dump(bim_json, tmp_file, indent=2)
|
675
|
+
|
676
|
+
print(f"✓ BIM downloaded successfully")
|
677
|
+
print(f" - Tables: {len(bim_json.get('model', {}).get('tables', []))}")
|
678
|
+
print(f" - Relationships: {len(bim_json.get('model', {}).get('relationships', []))}")
|
679
|
+
|
680
|
+
# Step 2: Deploy to destination
|
681
|
+
print("\n" + "-" * 70)
|
682
|
+
print("[Step 2/2] Deploying to destination workspace...")
|
683
|
+
print("-" * 70)
|
684
|
+
|
685
|
+
result = deploy_semantic_model(
|
686
|
+
workspace_name_or_id=ws_dest,
|
687
|
+
lakehouse_name_or_id=lakehouse,
|
688
|
+
schema_name=schema,
|
689
|
+
dataset_name=new_model_name,
|
690
|
+
bim_url_or_path=temp_bim_path,
|
691
|
+
wait_seconds=wait_seconds
|
692
|
+
)
|
693
|
+
|
694
|
+
# Clean up temp file
|
695
|
+
try:
|
696
|
+
os.unlink(temp_bim_path)
|
697
|
+
except:
|
698
|
+
pass
|
699
|
+
|
700
|
+
if result == 1:
|
701
|
+
print("\n" + "=" * 70)
|
702
|
+
print("🎉 Copy Operation Completed!")
|
703
|
+
print("=" * 70)
|
704
|
+
print(f"Source: {ws_source}/{model_name}")
|
705
|
+
print(f"Destination: {ws_dest}/{lakehouse}/{schema}/{new_model_name}")
|
706
|
+
print("=" * 70)
|
707
|
+
|
708
|
+
return result
|
709
|
+
|
710
|
+
except Exception as e:
|
711
|
+
print("\n" + "=" * 70)
|
712
|
+
print("❌ Copy Operation Failed")
|
713
|
+
print("=" * 70)
|
714
|
+
print(f"Error: {str(e)}")
|
715
|
+
print("\n💡 Troubleshooting:")
|
716
|
+
print(f" - Verify source workspace '{ws_source}' and model '{model_name}' exist")
|
717
|
+
print(f" - Verify destination workspace and lakehouse exist")
|
718
|
+
print(f" - Ensure you have permissions for both workspaces")
|
719
|
+
print("=" * 70)
|
720
|
+
return 0
|
721
|
+
|
duckrun/stats.py
CHANGED
@@ -4,6 +4,7 @@ Delta Lake table statistics functionality for duckrun
|
|
4
4
|
import duckdb
|
5
5
|
from deltalake import DeltaTable
|
6
6
|
from datetime import datetime
|
7
|
+
import pyarrow as pa
|
7
8
|
|
8
9
|
|
9
10
|
def _table_exists(duckrun_instance, schema_name: str, table_name: str) -> bool:
|
@@ -149,17 +150,23 @@ def get_stats(duckrun_instance, source: str):
|
|
149
150
|
dt = DeltaTable(table_path)
|
150
151
|
add_actions = dt.get_add_actions(flatten=True)
|
151
152
|
|
152
|
-
# Convert to dict -
|
153
|
-
#
|
153
|
+
# Convert RecordBatch to dict - works with both PyArrow (deltalake 0.18.2) and arro3 (newer versions)
|
154
|
+
# Strategy: Use duck typing - try direct conversion first, then manual extraction
|
155
|
+
# This works because both PyArrow and arro3 RecordBatches have schema and column() methods
|
156
|
+
|
154
157
|
try:
|
158
|
+
# Old deltalake (0.18.2): PyArrow RecordBatch has to_pydict() directly
|
155
159
|
xx = add_actions.to_pydict()
|
156
160
|
except AttributeError:
|
157
|
-
# New
|
158
|
-
|
159
|
-
if
|
160
|
-
#
|
161
|
-
|
161
|
+
# New deltalake with arro3: Use schema and column() methods
|
162
|
+
# This is the universal approach that works with both PyArrow and arro3
|
163
|
+
if hasattr(add_actions, 'schema') and hasattr(add_actions, 'column'):
|
164
|
+
# Extract columns manually and create PyArrow table
|
165
|
+
arrow_table = pa.table({name: add_actions.column(name) for name in add_actions.schema.names})
|
166
|
+
xx = arrow_table.to_pydict()
|
162
167
|
else:
|
168
|
+
# Fallback: empty dict (shouldn't happen)
|
169
|
+
print(f"Warning: Could not convert RecordBatch for table '{tbl}': Unexpected type {type(add_actions)}")
|
163
170
|
xx = {}
|
164
171
|
|
165
172
|
# Check if VORDER exists
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: duckrun
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.10
|
4
4
|
Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
|
5
5
|
Author: mim
|
6
6
|
License: MIT
|
@@ -420,6 +420,37 @@ success = con.run(pipeline) # Returns True only if ALL tasks succeed
|
|
420
420
|
|
421
421
|
This prevents downstream tasks from processing incomplete or corrupted data.
|
422
422
|
|
423
|
+
### Semantic Model Deployment
|
424
|
+
|
425
|
+
Deploy Power BI semantic models directly from BIM files using DirectLake mode:
|
426
|
+
|
427
|
+
```python
|
428
|
+
# Connect to lakehouse
|
429
|
+
con = duckrun.connect("Analytics/Sales.lakehouse/dbo")
|
430
|
+
|
431
|
+
# Deploy with auto-generated name (lakehouse_schema)
|
432
|
+
con.deploy("https://raw.githubusercontent.com/user/repo/main/model.bim")
|
433
|
+
|
434
|
+
# Deploy with custom name
|
435
|
+
con.deploy(
|
436
|
+
"https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
|
437
|
+
dataset_name="Sales Analytics Model",
|
438
|
+
wait_seconds=10 # Wait for permission propagation
|
439
|
+
)
|
440
|
+
```
|
441
|
+
|
442
|
+
**Features:**
|
443
|
+
- 🚀 **DirectLake Mode**: Deploys semantic models with DirectLake connection
|
444
|
+
- 🔄 **Automatic Configuration**: Auto-configures workspace, lakehouse, and schema connections
|
445
|
+
- 📦 **BIM from URL**: Load model definitions from GitHub or any accessible URL
|
446
|
+
- ⏱️ **Permission Handling**: Configurable wait time for permission propagation
|
447
|
+
|
448
|
+
**Use Cases:**
|
449
|
+
- Deploy semantic models as part of CI/CD pipelines
|
450
|
+
- Version control your semantic models in Git
|
451
|
+
- Automated model deployment across environments
|
452
|
+
- Streamline DirectLake model creation
|
453
|
+
|
423
454
|
### Delta Lake Optimization
|
424
455
|
|
425
456
|
Duckrun automatically:
|
@@ -534,6 +565,12 @@ con.sql("""
|
|
534
565
|
|
535
566
|
# 5. Download processed files for external systems
|
536
567
|
con.download("processed_reports", "./exports", ['.csv'])
|
568
|
+
|
569
|
+
# 6. Deploy semantic model for Power BI
|
570
|
+
con.deploy(
|
571
|
+
"https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
|
572
|
+
dataset_name="Sales Analytics"
|
573
|
+
)
|
537
574
|
```
|
538
575
|
|
539
576
|
**This example demonstrates:**
|
@@ -541,8 +578,9 @@ con.download("processed_reports", "./exports", ['.csv'])
|
|
541
578
|
- 🔄 **Pipeline orchestration** with SQL and Python tasks
|
542
579
|
- ⚡ **Fast data exploration** with DuckDB
|
543
580
|
- 💾 **Delta table creation** with Spark-style API
|
544
|
-
-
|
545
|
-
-
|
581
|
+
- 🔀 **Schema evolution** and partitioning
|
582
|
+
- 📤 **File downloads** from OneLake Files
|
583
|
+
- 📊 **Semantic model deployment** with DirectLake
|
546
584
|
|
547
585
|
## Schema Evolution & Partitioning Guide
|
548
586
|
|
@@ -0,0 +1,14 @@
|
|
1
|
+
duckrun/__init__.py,sha256=cTj6KQ6hKmgu1z7k9nhDcO5lct049luxjx1V0QnymCo,235
|
2
|
+
duckrun/auth.py,sha256=qPaLQ7InlV9leA9r6E6VEeYavFFoBi0zSN8m_l1aoQs,9545
|
3
|
+
duckrun/core.py,sha256=g9WtvhROxFSo2Idb979fY5HhxbMm_x-tajc_zWMtqCU,46853
|
4
|
+
duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
|
5
|
+
duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
|
6
|
+
duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
|
7
|
+
duckrun/semantic_model.py,sha256=obzlN2-dbEW3JmDop-vrZGGGLi9u3ThhTbgtDjou7uY,29509
|
8
|
+
duckrun/stats.py,sha256=oKIjZ7u5cFVT63FuOl5UqoDsOG3098woSCn-uI6i_sQ,11084
|
9
|
+
duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
|
10
|
+
duckrun-0.2.10.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
11
|
+
duckrun-0.2.10.dist-info/METADATA,sha256=CwDyjJqyfBoISxZ1bfdojVVsP0HcrLylgqCTpMsC6e8,20624
|
12
|
+
duckrun-0.2.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
13
|
+
duckrun-0.2.10.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
14
|
+
duckrun-0.2.10.dist-info/RECORD,,
|
@@ -1,14 +0,0 @@
|
|
1
|
-
duckrun/__init__.py,sha256=cTj6KQ6hKmgu1z7k9nhDcO5lct049luxjx1V0QnymCo,235
|
2
|
-
duckrun/auth.py,sha256=qPaLQ7InlV9leA9r6E6VEeYavFFoBi0zSN8m_l1aoQs,9545
|
3
|
-
duckrun/core.py,sha256=CrWMgA1QHvVF2AAlTlBlQ7VfKsuakcqZa4VuX2WJmik,39279
|
4
|
-
duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
|
5
|
-
duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
|
6
|
-
duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
|
7
|
-
duckrun/semantic_model.py,sha256=jmTrS15WmhU3rQfdpLII1wm3EORdQfqQxOhqOSyXB_w,17305
|
8
|
-
duckrun/stats.py,sha256=CXfb2DWF3PgOckelJooU0y-BAsNT9NFDfDYEmo0mUQQ,10473
|
9
|
-
duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
|
10
|
-
duckrun-0.2.9.dev5.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
11
|
-
duckrun-0.2.9.dev5.dist-info/METADATA,sha256=r0oG0-EI5oE-UgfRWJskXHHc4u7uNhFrdPFsTrCtGwc,19277
|
12
|
-
duckrun-0.2.9.dev5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
13
|
-
duckrun-0.2.9.dev5.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
14
|
-
duckrun-0.2.9.dev5.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|