duckrun 0.2.9.dev4__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckrun/__init__.py +1 -1
- duckrun/core.py +162 -8
- duckrun/semantic_model.py +342 -48
- duckrun/stats.py +14 -7
- {duckrun-0.2.9.dev4.dist-info → duckrun-0.2.10.dist-info}/METADATA +41 -3
- duckrun-0.2.10.dist-info/RECORD +14 -0
- duckrun-0.2.9.dev4.dist-info/RECORD +0 -14
- {duckrun-0.2.9.dev4.dist-info → duckrun-0.2.10.dist-info}/WHEEL +0 -0
- {duckrun-0.2.9.dev4.dist-info → duckrun-0.2.10.dist-info}/licenses/LICENSE +0 -0
- {duckrun-0.2.9.dev4.dist-info → duckrun-0.2.10.dist-info}/top_level.txt +0 -0
duckrun/__init__.py
CHANGED
duckrun/core.py
CHANGED
@@ -2,6 +2,8 @@ import duckdb
|
|
2
2
|
import requests
|
3
3
|
import os
|
4
4
|
import importlib.util
|
5
|
+
import json
|
6
|
+
import time
|
5
7
|
from deltalake import DeltaTable, write_deltalake
|
6
8
|
from typing import List, Tuple, Union, Optional, Callable, Dict, Any
|
7
9
|
from string import Template
|
@@ -702,8 +704,11 @@ class Duckrun:
|
|
702
704
|
Deploy a semantic model from a BIM file using DirectLake mode.
|
703
705
|
|
704
706
|
Args:
|
705
|
-
bim_url:
|
706
|
-
|
707
|
+
bim_url: Can be:
|
708
|
+
- URL: "https://raw.githubusercontent.com/.../model.bim"
|
709
|
+
- Local file: "model.bim"
|
710
|
+
- Workspace/Model: "workspace_name/model_name"
|
711
|
+
dataset_name: Name for the semantic model (default: source model name if workspace/model format, else lakehouse_schema)
|
707
712
|
wait_seconds: Seconds to wait for permission propagation (default: 5)
|
708
713
|
|
709
714
|
Returns:
|
@@ -712,18 +717,28 @@ class Duckrun:
|
|
712
717
|
Examples:
|
713
718
|
dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
|
714
719
|
|
715
|
-
# Deploy
|
716
|
-
dr.deploy("
|
720
|
+
# Deploy from workspace/model (uses same name by default)
|
721
|
+
dr.deploy("Source Workspace/Source Model") # Creates "Source Model"
|
717
722
|
|
718
723
|
# Deploy with custom name
|
719
|
-
dr.deploy("
|
720
|
-
|
724
|
+
dr.deploy("Source Workspace/Source Model", dataset_name="Sales Model Copy")
|
725
|
+
|
726
|
+
# Deploy from URL or local file
|
727
|
+
dr.deploy("https://raw.githubusercontent.com/.../model.bim", dataset_name="My Model")
|
721
728
|
"""
|
722
729
|
from .semantic_model import deploy_semantic_model
|
723
730
|
|
724
731
|
# Auto-generate dataset name if not provided
|
725
732
|
if dataset_name is None:
|
726
|
-
|
733
|
+
# If using workspace/model format, use the model name
|
734
|
+
if "/" in bim_url and not bim_url.startswith(('http://', 'https://')):
|
735
|
+
parts = bim_url.split("/")
|
736
|
+
if len(parts) == 2:
|
737
|
+
dataset_name = parts[1] # Use the model name
|
738
|
+
else:
|
739
|
+
dataset_name = f"{self.lakehouse_name}_{self.schema}"
|
740
|
+
else:
|
741
|
+
dataset_name = f"{self.lakehouse_name}_{self.schema}"
|
727
742
|
|
728
743
|
# Call the deployment function (DirectLake only)
|
729
744
|
return deploy_semantic_model(
|
@@ -731,7 +746,7 @@ class Duckrun:
|
|
731
746
|
lakehouse_name_or_id=self.lakehouse_name,
|
732
747
|
schema_name=self.schema,
|
733
748
|
dataset_name=dataset_name,
|
734
|
-
|
749
|
+
bim_url_or_path=bim_url,
|
735
750
|
wait_seconds=wait_seconds
|
736
751
|
)
|
737
752
|
|
@@ -864,6 +879,145 @@ class WorkspaceConnection:
|
|
864
879
|
print(f"❌ Error creating lakehouse '{lakehouse_name}': {e}")
|
865
880
|
return False
|
866
881
|
|
882
|
+
def download_bim(self, semantic_model_name: str, output_path: Optional[str] = None) -> Optional[str]:
|
883
|
+
"""
|
884
|
+
Download a semantic model as a BIM (Business Intelligence Model) file.
|
885
|
+
|
886
|
+
Args:
|
887
|
+
semantic_model_name: Name of the semantic model to download
|
888
|
+
output_path: Optional path to save the BIM file. If not provided, returns the BIM content as JSON string
|
889
|
+
|
890
|
+
Returns:
|
891
|
+
BIM content as JSON string if output_path is None, or the file path if saved successfully
|
892
|
+
|
893
|
+
Example:
|
894
|
+
con = duckrun.connect("My Workspace")
|
895
|
+
# Get BIM content as string
|
896
|
+
bim_content = con.download_bim("Sales Model")
|
897
|
+
# Or save to file
|
898
|
+
con.download_bim("Sales Model", "sales_model.bim")
|
899
|
+
"""
|
900
|
+
try:
|
901
|
+
# Get authentication token
|
902
|
+
from .auth import get_fabric_api_token
|
903
|
+
token = get_fabric_api_token()
|
904
|
+
if not token:
|
905
|
+
print("❌ Failed to authenticate for downloading semantic model")
|
906
|
+
return None
|
907
|
+
|
908
|
+
# Resolve workspace name to ID
|
909
|
+
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
910
|
+
if not workspace_id:
|
911
|
+
print(f"❌ Workspace '{self.workspace_name}' not found")
|
912
|
+
return None
|
913
|
+
|
914
|
+
# Get semantic model ID
|
915
|
+
print(f"🔍 Looking for semantic model '{semantic_model_name}'...")
|
916
|
+
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/semanticModels"
|
917
|
+
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
918
|
+
|
919
|
+
response = requests.get(url, headers=headers)
|
920
|
+
response.raise_for_status()
|
921
|
+
|
922
|
+
models = response.json().get("value", [])
|
923
|
+
model = next((m for m in models if m.get("displayName") == semantic_model_name), None)
|
924
|
+
|
925
|
+
if not model:
|
926
|
+
print(f"❌ Semantic model '{semantic_model_name}' not found in workspace '{self.workspace_name}'")
|
927
|
+
return None
|
928
|
+
|
929
|
+
model_id = model.get("id")
|
930
|
+
print(f"✓ Found semantic model: {semantic_model_name} (ID: {model_id})")
|
931
|
+
|
932
|
+
# Get the model definition using the generic items API
|
933
|
+
print("📥 Downloading BIM definition...")
|
934
|
+
definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{model_id}/getDefinition"
|
935
|
+
|
936
|
+
# POST request to get definition with TMSL format (which includes model.bim)
|
937
|
+
# Note: format parameter should be in query string, not body
|
938
|
+
response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
|
939
|
+
response.raise_for_status()
|
940
|
+
|
941
|
+
# Handle long-running operation if needed
|
942
|
+
if response.status_code == 202:
|
943
|
+
operation_id = response.headers.get('x-ms-operation-id')
|
944
|
+
print(f" Waiting for operation to complete...")
|
945
|
+
|
946
|
+
max_attempts = 30
|
947
|
+
for attempt in range(max_attempts):
|
948
|
+
time.sleep(2)
|
949
|
+
|
950
|
+
# Get operation result
|
951
|
+
result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
|
952
|
+
result_response = requests.get(result_url, headers=headers)
|
953
|
+
|
954
|
+
# Check operation status
|
955
|
+
status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
|
956
|
+
status_response = requests.get(status_url, headers=headers)
|
957
|
+
status = status_response.json().get('status')
|
958
|
+
|
959
|
+
if status == 'Succeeded':
|
960
|
+
result_data = result_response.json()
|
961
|
+
break
|
962
|
+
elif status == 'Failed':
|
963
|
+
error = status_response.json().get('error', {})
|
964
|
+
print(f"❌ Operation failed: {error.get('message')}")
|
965
|
+
return None
|
966
|
+
elif attempt == max_attempts - 1:
|
967
|
+
print("❌ Operation timed out")
|
968
|
+
return None
|
969
|
+
else:
|
970
|
+
result_data = response.json()
|
971
|
+
|
972
|
+
# Extract BIM content from definition
|
973
|
+
definition = result_data.get('definition', {})
|
974
|
+
parts = definition.get('parts', [])
|
975
|
+
|
976
|
+
# Debug: show what parts we have
|
977
|
+
if not parts:
|
978
|
+
print("❌ No definition parts found in response")
|
979
|
+
print(f" Result data keys: {list(result_data.keys())}")
|
980
|
+
print(f" Definition keys: {list(definition.keys()) if definition else 'None'}")
|
981
|
+
return None
|
982
|
+
|
983
|
+
print(f" Found {len(parts)} definition parts:")
|
984
|
+
for part in parts:
|
985
|
+
print(f" - {part.get('path', 'unknown')}")
|
986
|
+
|
987
|
+
bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
|
988
|
+
if not bim_part:
|
989
|
+
print("❌ No BIM file found in semantic model definition")
|
990
|
+
print(f" Looking for files ending with '.bim', found: {[p.get('path') for p in parts]}")
|
991
|
+
return None
|
992
|
+
|
993
|
+
# Decode the BIM content (it's base64 encoded)
|
994
|
+
import base64
|
995
|
+
bim_payload = bim_part.get('payload', '')
|
996
|
+
bim_content = base64.b64decode(bim_payload).decode('utf-8')
|
997
|
+
bim_json = json.loads(bim_content)
|
998
|
+
|
999
|
+
# Format as pretty JSON
|
1000
|
+
bim_formatted = json.dumps(bim_json, indent=2)
|
1001
|
+
|
1002
|
+
print(f"✓ BIM file downloaded successfully")
|
1003
|
+
print(f" - Tables: {len(bim_json.get('model', {}).get('tables', []))}")
|
1004
|
+
print(f" - Relationships: {len(bim_json.get('model', {}).get('relationships', []))}")
|
1005
|
+
|
1006
|
+
# Save to file or return content
|
1007
|
+
if output_path:
|
1008
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
1009
|
+
f.write(bim_formatted)
|
1010
|
+
print(f"✓ Saved to: {output_path}")
|
1011
|
+
return output_path
|
1012
|
+
else:
|
1013
|
+
return bim_formatted
|
1014
|
+
|
1015
|
+
except Exception as e:
|
1016
|
+
print(f"❌ Error downloading semantic model: {e}")
|
1017
|
+
import traceback
|
1018
|
+
traceback.print_exc()
|
1019
|
+
return None
|
1020
|
+
|
867
1021
|
def _get_workspace_id_by_name(self, token: str, workspace_name: str) -> Optional[str]:
|
868
1022
|
"""Helper method to get workspace ID from name"""
|
869
1023
|
try:
|
duckrun/semantic_model.py
CHANGED
@@ -130,7 +130,7 @@ def check_dataset_exists(dataset_name, workspace_id, client):
|
|
130
130
|
|
131
131
|
|
132
132
|
def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
|
133
|
-
"""Refresh a dataset and monitor progress"""
|
133
|
+
"""Refresh a dataset and monitor progress using Power BI API"""
|
134
134
|
|
135
135
|
# If dataset_id not provided, look it up by name
|
136
136
|
if not dataset_id:
|
@@ -144,48 +144,149 @@ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
|
|
144
144
|
"objects": []
|
145
145
|
}
|
146
146
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
147
|
+
# Use Power BI API for refresh (not Fabric API)
|
148
|
+
powerbi_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes"
|
149
|
+
headers = client._get_headers()
|
150
|
+
|
151
|
+
response = requests.post(powerbi_url, headers=headers, json=payload)
|
151
152
|
|
152
153
|
if response.status_code in [200, 202]:
|
153
154
|
print(f"✓ Refresh initiated")
|
154
155
|
|
155
|
-
refresh_id
|
156
|
-
if
|
157
|
-
|
158
|
-
|
156
|
+
# For 202, get the refresh_id from the Location header
|
157
|
+
if response.status_code == 202:
|
158
|
+
location = response.headers.get('Location')
|
159
|
+
if location:
|
160
|
+
refresh_id = location.split('/')[-1]
|
161
|
+
print(" Monitoring refresh progress...")
|
162
|
+
max_attempts = 60
|
163
|
+
for attempt in range(max_attempts):
|
164
|
+
time.sleep(5)
|
165
|
+
|
166
|
+
# Check refresh status using Power BI API
|
167
|
+
status_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes/{refresh_id}"
|
168
|
+
status_response = requests.get(status_url, headers=headers)
|
169
|
+
status_response.raise_for_status()
|
170
|
+
status = status_response.json().get('status')
|
171
|
+
|
172
|
+
if status == 'Completed':
|
173
|
+
print(f"✓ Refresh completed successfully")
|
174
|
+
return
|
175
|
+
elif status == 'Failed':
|
176
|
+
error = status_response.json().get('serviceExceptionJson', '')
|
177
|
+
raise Exception(f"Refresh failed: {error}")
|
178
|
+
elif status == 'Cancelled':
|
179
|
+
raise Exception("Refresh was cancelled")
|
180
|
+
|
181
|
+
if attempt % 6 == 0:
|
182
|
+
print(f" Status: {status}...")
|
183
|
+
|
184
|
+
raise Exception(f"Refresh timed out")
|
185
|
+
else:
|
186
|
+
response.raise_for_status()
|
187
|
+
|
188
|
+
|
189
|
+
def download_bim_from_github(url_or_path):
|
190
|
+
"""
|
191
|
+
Load BIM file from URL, local file path, or workspace/model format.
|
192
|
+
|
193
|
+
Args:
|
194
|
+
url_or_path: Can be:
|
195
|
+
- Local file path: "model.bim"
|
196
|
+
- URL: "https://..."
|
197
|
+
- Workspace/Model: "workspace_name/semantic_model_name"
|
198
|
+
|
199
|
+
Returns:
|
200
|
+
BIM content as dictionary
|
201
|
+
"""
|
202
|
+
import os
|
203
|
+
import tempfile
|
204
|
+
|
205
|
+
# Check if it's a local file path
|
206
|
+
if os.path.exists(url_or_path):
|
207
|
+
print(f"Loading BIM file from local path...")
|
208
|
+
with open(url_or_path, 'r', encoding='utf-8') as f:
|
209
|
+
bim_content = json.load(f)
|
210
|
+
print(f"✓ BIM file loaded from: {url_or_path}")
|
211
|
+
# Check if it's a URL
|
212
|
+
elif url_or_path.startswith(('http://', 'https://')):
|
213
|
+
print(f"Downloading BIM file from URL...")
|
214
|
+
response = requests.get(url_or_path)
|
215
|
+
response.raise_for_status()
|
216
|
+
bim_content = response.json()
|
217
|
+
print(f"✓ BIM file downloaded from URL")
|
218
|
+
# Check if it's workspace/model format
|
219
|
+
elif "/" in url_or_path and not os.path.exists(url_or_path):
|
220
|
+
print(f"Downloading BIM from workspace/model...")
|
221
|
+
parts = url_or_path.split("/")
|
222
|
+
if len(parts) != 2:
|
223
|
+
raise ValueError(f"Invalid workspace/model format: '{url_or_path}'. Expected: 'workspace_name/model_name'")
|
224
|
+
|
225
|
+
ws_name, model_name = parts
|
226
|
+
|
227
|
+
# Download BIM from the semantic model
|
228
|
+
client = FabricRestClient()
|
229
|
+
ws_id = get_workspace_id(ws_name, client)
|
230
|
+
|
231
|
+
# Get semantic model ID
|
232
|
+
response = client.get(f"/v1/workspaces/{ws_id}/semanticModels")
|
233
|
+
models = response.json().get('value', [])
|
234
|
+
model = next((m for m in models if m.get('displayName') == model_name), None)
|
235
|
+
|
236
|
+
if not model:
|
237
|
+
raise ValueError(f"Semantic model '{model_name}' not found in workspace '{ws_name}'")
|
238
|
+
|
239
|
+
model_id = model.get('id')
|
240
|
+
|
241
|
+
# Get definition using Items API with TMSL format
|
242
|
+
definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{ws_id}/items/{model_id}/getDefinition"
|
243
|
+
headers = client._get_headers()
|
244
|
+
response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
|
245
|
+
response.raise_for_status()
|
246
|
+
|
247
|
+
# Handle long-running operation
|
248
|
+
if response.status_code == 202:
|
249
|
+
operation_id = response.headers.get('x-ms-operation-id')
|
250
|
+
max_attempts = 30
|
251
|
+
|
159
252
|
for attempt in range(max_attempts):
|
160
|
-
time.sleep(
|
253
|
+
time.sleep(2)
|
161
254
|
|
162
|
-
|
163
|
-
|
164
|
-
)
|
255
|
+
status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
|
256
|
+
status_response = requests.get(status_url, headers=headers)
|
165
257
|
status = status_response.json().get('status')
|
166
258
|
|
167
|
-
if status == '
|
168
|
-
|
169
|
-
|
259
|
+
if status == 'Succeeded':
|
260
|
+
result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
|
261
|
+
result_response = requests.get(result_url, headers=headers)
|
262
|
+
result_data = result_response.json()
|
263
|
+
break
|
170
264
|
elif status == 'Failed':
|
171
265
|
error = status_response.json().get('error', {})
|
172
|
-
raise Exception(f"
|
173
|
-
elif
|
174
|
-
raise Exception("
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
266
|
+
raise Exception(f"Download operation failed: {error.get('message')}")
|
267
|
+
elif attempt == max_attempts - 1:
|
268
|
+
raise Exception("Download operation timed out")
|
269
|
+
else:
|
270
|
+
result_data = response.json()
|
271
|
+
|
272
|
+
# Extract BIM content
|
273
|
+
definition = result_data.get('definition', {})
|
274
|
+
parts = definition.get('parts', [])
|
275
|
+
|
276
|
+
bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
|
277
|
+
if not bim_part:
|
278
|
+
raise Exception("No BIM file found in semantic model definition")
|
279
|
+
|
280
|
+
# Decode BIM
|
281
|
+
import base64
|
282
|
+
bim_payload = bim_part.get('payload', '')
|
283
|
+
bim_content_str = base64.b64decode(bim_payload).decode('utf-8')
|
284
|
+
bim_content = json.loads(bim_content_str)
|
285
|
+
|
286
|
+
print(f"✓ BIM downloaded from {ws_name}/{model_name}")
|
287
|
+
else:
|
288
|
+
raise ValueError(f"Invalid BIM source: '{url_or_path}'. Must be a valid file path, URL, or 'workspace/model' format.")
|
289
|
+
|
189
290
|
print(f" - Tables: {len(bim_content.get('model', {}).get('tables', []))}")
|
190
291
|
print(f" - Relationships: {len(bim_content.get('model', {}).get('relationships', []))}")
|
191
292
|
return bim_content
|
@@ -292,23 +393,27 @@ def create_dataset_from_bim(dataset_name, bim_content, workspace_id, client):
|
|
292
393
|
for attempt in range(max_attempts):
|
293
394
|
time.sleep(2)
|
294
395
|
|
295
|
-
# Get operation result (not just status)
|
296
|
-
result_response = client.get(f"/v1/operations/{operation_id}/result")
|
297
|
-
|
298
396
|
# Check if operation is complete by getting the status
|
299
397
|
status_response = client.get(f"/v1/operations/{operation_id}")
|
300
398
|
status = status_response.json().get('status')
|
301
399
|
|
302
400
|
if status == 'Succeeded':
|
303
401
|
print(f"✓ Operation completed")
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
402
|
+
|
403
|
+
# Now get the result (only after status is Succeeded)
|
404
|
+
try:
|
405
|
+
result_response = client.get(f"/v1/operations/{operation_id}/result")
|
406
|
+
result_data = result_response.json()
|
407
|
+
dataset_id = result_data.get('id')
|
408
|
+
if dataset_id:
|
409
|
+
return dataset_id
|
410
|
+
except:
|
411
|
+
# If result endpoint fails, fallback to searching by name
|
412
|
+
pass
|
413
|
+
|
414
|
+
# Fallback: search for the dataset by name
|
415
|
+
return get_dataset_id(dataset_name, workspace_id, client)
|
416
|
+
|
312
417
|
elif status == 'Failed':
|
313
418
|
error = status_response.json().get('error', {})
|
314
419
|
raise Exception(f"Operation failed: {error.get('message')}")
|
@@ -326,7 +431,7 @@ def create_dataset_from_bim(dataset_name, bim_content, workspace_id, client):
|
|
326
431
|
|
327
432
|
|
328
433
|
def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_name, dataset_name,
|
329
|
-
|
434
|
+
bim_url_or_path, wait_seconds=5):
|
330
435
|
"""
|
331
436
|
Deploy a semantic model using DirectLake mode.
|
332
437
|
|
@@ -335,15 +440,20 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
|
|
335
440
|
lakehouse_name_or_id: Name or GUID of the lakehouse
|
336
441
|
schema_name: Schema name (e.g., 'dbo', 'staging')
|
337
442
|
dataset_name: Name for the semantic model
|
338
|
-
|
443
|
+
bim_url_or_path: URL to the BIM file or local file path (e.g., 'model.bim' or 'https://...')
|
339
444
|
wait_seconds: Seconds to wait before refresh (default: 5)
|
340
445
|
|
341
446
|
Returns:
|
342
447
|
1 for success, 0 for failure
|
343
448
|
|
344
449
|
Examples:
|
450
|
+
# Using a URL
|
345
451
|
dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
|
346
452
|
dr.deploy("https://raw.githubusercontent.com/.../model.bim")
|
453
|
+
|
454
|
+
# Using a local file
|
455
|
+
dr.deploy("./my_model.bim")
|
456
|
+
dr.deploy("C:/path/to/model.bim")
|
347
457
|
"""
|
348
458
|
print("=" * 70)
|
349
459
|
print("Semantic Model Deployment (DirectLake)")
|
@@ -382,8 +492,8 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
|
|
382
492
|
lakehouse_id = get_lakehouse_id(lakehouse_name_or_id, workspace_id, client)
|
383
493
|
|
384
494
|
# Step 4: Download and update BIM
|
385
|
-
print("\n[Step 4/6]
|
386
|
-
bim_content = download_bim_from_github(
|
495
|
+
print("\n[Step 4/6] Loading and configuring BIM file...")
|
496
|
+
bim_content = download_bim_from_github(bim_url_or_path)
|
387
497
|
|
388
498
|
modified_bim = update_bim_for_directlake(bim_content, workspace_id, lakehouse_id, schema_name)
|
389
499
|
modified_bim['name'] = dataset_name
|
@@ -425,3 +535,187 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
|
|
425
535
|
print(f" - Check tables are in Delta format")
|
426
536
|
print("=" * 70)
|
427
537
|
return 0
|
538
|
+
|
539
|
+
|
540
|
+
def copy_model(ws_source, model_name, destination, new_model_name=None, wait_seconds=5):
|
541
|
+
"""
|
542
|
+
Copy a semantic model from one workspace to another.
|
543
|
+
|
544
|
+
This is a convenience function that downloads a BIM file from a source workspace
|
545
|
+
and deploys it to a destination lakehouse in one operation.
|
546
|
+
|
547
|
+
Args:
|
548
|
+
ws_source: Source workspace name or GUID
|
549
|
+
model_name: Name of the semantic model to copy
|
550
|
+
destination: Destination in format "workspace/lakehouse.lakehouse/schema"
|
551
|
+
new_model_name: Name for the new semantic model (default: same as source)
|
552
|
+
wait_seconds: Seconds to wait before refresh (default: 5)
|
553
|
+
|
554
|
+
Returns:
|
555
|
+
1 for success, 0 for failure
|
556
|
+
|
557
|
+
Examples:
|
558
|
+
# Copy to same workspace, different lakehouse
|
559
|
+
copy_model("My Workspace", "Sales Model", "My Workspace/Target Lakehouse.lakehouse/dbo")
|
560
|
+
|
561
|
+
# Copy to different workspace with new name
|
562
|
+
copy_model("Source WS", "Production Model", "Target WS/Data Lake.lakehouse/analytics",
|
563
|
+
new_model_name="Production Model - Copy")
|
564
|
+
|
565
|
+
# Using the connect pattern
|
566
|
+
import duckrun
|
567
|
+
duckrun.semantic_model.copy_model("Source", "Model", "Target/LH.lakehouse/dbo")
|
568
|
+
"""
|
569
|
+
import tempfile
|
570
|
+
import os
|
571
|
+
|
572
|
+
print("=" * 70)
|
573
|
+
print("Semantic Model Copy Operation")
|
574
|
+
print("=" * 70)
|
575
|
+
|
576
|
+
try:
|
577
|
+
# Parse destination
|
578
|
+
parts = destination.split("/")
|
579
|
+
if len(parts) != 3:
|
580
|
+
raise ValueError(
|
581
|
+
f"Invalid destination format: '{destination}'. "
|
582
|
+
"Expected format: 'workspace/lakehouse.lakehouse/schema'"
|
583
|
+
)
|
584
|
+
|
585
|
+
ws_dest, lakehouse, schema = parts
|
586
|
+
|
587
|
+
# Remove .lakehouse suffix if present
|
588
|
+
if lakehouse.endswith(".lakehouse"):
|
589
|
+
lakehouse = lakehouse[:-10]
|
590
|
+
|
591
|
+
# Use source model name if new name not provided
|
592
|
+
if not new_model_name:
|
593
|
+
new_model_name = model_name
|
594
|
+
|
595
|
+
print(f"\nSource:")
|
596
|
+
print(f" Workspace: {ws_source}")
|
597
|
+
print(f" Model: {model_name}")
|
598
|
+
print(f"\nDestination:")
|
599
|
+
print(f" Workspace: {ws_dest}")
|
600
|
+
print(f" Lakehouse: {lakehouse}")
|
601
|
+
print(f" Schema: {schema}")
|
602
|
+
print(f" New Model Name: {new_model_name}")
|
603
|
+
|
604
|
+
# Step 1: Download BIM from source
|
605
|
+
print("\n" + "-" * 70)
|
606
|
+
print("[Step 1/2] Downloading BIM from source workspace...")
|
607
|
+
print("-" * 70)
|
608
|
+
|
609
|
+
client = FabricRestClient()
|
610
|
+
ws_source_id = get_workspace_id(ws_source, client)
|
611
|
+
|
612
|
+
# Use temporary file for BIM content
|
613
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.bim', delete=False, encoding='utf-8') as tmp_file:
|
614
|
+
temp_bim_path = tmp_file.name
|
615
|
+
|
616
|
+
# Get semantic model ID
|
617
|
+
response = client.get(f"/v1/workspaces/{ws_source_id}/semanticModels")
|
618
|
+
models = response.json().get('value', [])
|
619
|
+
model = next((m for m in models if m.get('displayName') == model_name), None)
|
620
|
+
|
621
|
+
if not model:
|
622
|
+
raise ValueError(f"Semantic model '{model_name}' not found in workspace '{ws_source}'")
|
623
|
+
|
624
|
+
model_id = model.get('id')
|
625
|
+
print(f"✓ Found source model: {model_name} (ID: {model_id})")
|
626
|
+
|
627
|
+
# Get definition using Items API with TMSL format
|
628
|
+
print(" Downloading BIM definition...")
|
629
|
+
definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{ws_source_id}/items/{model_id}/getDefinition"
|
630
|
+
headers = client._get_headers()
|
631
|
+
response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
|
632
|
+
response.raise_for_status()
|
633
|
+
|
634
|
+
# Handle long-running operation
|
635
|
+
if response.status_code == 202:
|
636
|
+
operation_id = response.headers.get('x-ms-operation-id')
|
637
|
+
max_attempts = 30
|
638
|
+
|
639
|
+
for attempt in range(max_attempts):
|
640
|
+
time.sleep(2)
|
641
|
+
|
642
|
+
status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
|
643
|
+
status_response = requests.get(status_url, headers=headers)
|
644
|
+
status = status_response.json().get('status')
|
645
|
+
|
646
|
+
if status == 'Succeeded':
|
647
|
+
result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
|
648
|
+
result_response = requests.get(result_url, headers=headers)
|
649
|
+
result_data = result_response.json()
|
650
|
+
break
|
651
|
+
elif status == 'Failed':
|
652
|
+
error = status_response.json().get('error', {})
|
653
|
+
raise Exception(f"Download operation failed: {error.get('message')}")
|
654
|
+
elif attempt == max_attempts - 1:
|
655
|
+
raise Exception("Download operation timed out")
|
656
|
+
else:
|
657
|
+
result_data = response.json()
|
658
|
+
|
659
|
+
# Extract BIM content
|
660
|
+
definition = result_data.get('definition', {})
|
661
|
+
parts = definition.get('parts', [])
|
662
|
+
|
663
|
+
bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
|
664
|
+
if not bim_part:
|
665
|
+
raise Exception("No BIM file found in semantic model definition")
|
666
|
+
|
667
|
+
# Decode and save BIM
|
668
|
+
import base64
|
669
|
+
bim_payload = bim_part.get('payload', '')
|
670
|
+
bim_content = base64.b64decode(bim_payload).decode('utf-8')
|
671
|
+
bim_json = json.loads(bim_content)
|
672
|
+
|
673
|
+
# Write to temp file
|
674
|
+
json.dump(bim_json, tmp_file, indent=2)
|
675
|
+
|
676
|
+
print(f"✓ BIM downloaded successfully")
|
677
|
+
print(f" - Tables: {len(bim_json.get('model', {}).get('tables', []))}")
|
678
|
+
print(f" - Relationships: {len(bim_json.get('model', {}).get('relationships', []))}")
|
679
|
+
|
680
|
+
# Step 2: Deploy to destination
|
681
|
+
print("\n" + "-" * 70)
|
682
|
+
print("[Step 2/2] Deploying to destination workspace...")
|
683
|
+
print("-" * 70)
|
684
|
+
|
685
|
+
result = deploy_semantic_model(
|
686
|
+
workspace_name_or_id=ws_dest,
|
687
|
+
lakehouse_name_or_id=lakehouse,
|
688
|
+
schema_name=schema,
|
689
|
+
dataset_name=new_model_name,
|
690
|
+
bim_url_or_path=temp_bim_path,
|
691
|
+
wait_seconds=wait_seconds
|
692
|
+
)
|
693
|
+
|
694
|
+
# Clean up temp file
|
695
|
+
try:
|
696
|
+
os.unlink(temp_bim_path)
|
697
|
+
except:
|
698
|
+
pass
|
699
|
+
|
700
|
+
if result == 1:
|
701
|
+
print("\n" + "=" * 70)
|
702
|
+
print("🎉 Copy Operation Completed!")
|
703
|
+
print("=" * 70)
|
704
|
+
print(f"Source: {ws_source}/{model_name}")
|
705
|
+
print(f"Destination: {ws_dest}/{lakehouse}/{schema}/{new_model_name}")
|
706
|
+
print("=" * 70)
|
707
|
+
|
708
|
+
return result
|
709
|
+
|
710
|
+
except Exception as e:
|
711
|
+
print("\n" + "=" * 70)
|
712
|
+
print("❌ Copy Operation Failed")
|
713
|
+
print("=" * 70)
|
714
|
+
print(f"Error: {str(e)}")
|
715
|
+
print("\n💡 Troubleshooting:")
|
716
|
+
print(f" - Verify source workspace '{ws_source}' and model '{model_name}' exist")
|
717
|
+
print(f" - Verify destination workspace and lakehouse exist")
|
718
|
+
print(f" - Ensure you have permissions for both workspaces")
|
719
|
+
print("=" * 70)
|
720
|
+
return 0
|
721
|
+
|
duckrun/stats.py
CHANGED
@@ -4,6 +4,7 @@ Delta Lake table statistics functionality for duckrun
|
|
4
4
|
import duckdb
|
5
5
|
from deltalake import DeltaTable
|
6
6
|
from datetime import datetime
|
7
|
+
import pyarrow as pa
|
7
8
|
|
8
9
|
|
9
10
|
def _table_exists(duckrun_instance, schema_name: str, table_name: str) -> bool:
|
@@ -149,17 +150,23 @@ def get_stats(duckrun_instance, source: str):
|
|
149
150
|
dt = DeltaTable(table_path)
|
150
151
|
add_actions = dt.get_add_actions(flatten=True)
|
151
152
|
|
152
|
-
# Convert to dict -
|
153
|
-
#
|
153
|
+
# Convert RecordBatch to dict - works with both PyArrow (deltalake 0.18.2) and arro3 (newer versions)
|
154
|
+
# Strategy: Use duck typing - try direct conversion first, then manual extraction
|
155
|
+
# This works because both PyArrow and arro3 RecordBatches have schema and column() methods
|
156
|
+
|
154
157
|
try:
|
158
|
+
# Old deltalake (0.18.2): PyArrow RecordBatch has to_pydict() directly
|
155
159
|
xx = add_actions.to_pydict()
|
156
160
|
except AttributeError:
|
157
|
-
# New
|
158
|
-
|
159
|
-
if
|
160
|
-
#
|
161
|
-
|
161
|
+
# New deltalake with arro3: Use schema and column() methods
|
162
|
+
# This is the universal approach that works with both PyArrow and arro3
|
163
|
+
if hasattr(add_actions, 'schema') and hasattr(add_actions, 'column'):
|
164
|
+
# Extract columns manually and create PyArrow table
|
165
|
+
arrow_table = pa.table({name: add_actions.column(name) for name in add_actions.schema.names})
|
166
|
+
xx = arrow_table.to_pydict()
|
162
167
|
else:
|
168
|
+
# Fallback: empty dict (shouldn't happen)
|
169
|
+
print(f"Warning: Could not convert RecordBatch for table '{tbl}': Unexpected type {type(add_actions)}")
|
163
170
|
xx = {}
|
164
171
|
|
165
172
|
# Check if VORDER exists
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: duckrun
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.10
|
4
4
|
Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
|
5
5
|
Author: mim
|
6
6
|
License: MIT
|
@@ -420,6 +420,37 @@ success = con.run(pipeline) # Returns True only if ALL tasks succeed
|
|
420
420
|
|
421
421
|
This prevents downstream tasks from processing incomplete or corrupted data.
|
422
422
|
|
423
|
+
### Semantic Model Deployment
|
424
|
+
|
425
|
+
Deploy Power BI semantic models directly from BIM files using DirectLake mode:
|
426
|
+
|
427
|
+
```python
|
428
|
+
# Connect to lakehouse
|
429
|
+
con = duckrun.connect("Analytics/Sales.lakehouse/dbo")
|
430
|
+
|
431
|
+
# Deploy with auto-generated name (lakehouse_schema)
|
432
|
+
con.deploy("https://raw.githubusercontent.com/user/repo/main/model.bim")
|
433
|
+
|
434
|
+
# Deploy with custom name
|
435
|
+
con.deploy(
|
436
|
+
"https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
|
437
|
+
dataset_name="Sales Analytics Model",
|
438
|
+
wait_seconds=10 # Wait for permission propagation
|
439
|
+
)
|
440
|
+
```
|
441
|
+
|
442
|
+
**Features:**
|
443
|
+
- 🚀 **DirectLake Mode**: Deploys semantic models with DirectLake connection
|
444
|
+
- 🔄 **Automatic Configuration**: Auto-configures workspace, lakehouse, and schema connections
|
445
|
+
- 📦 **BIM from URL**: Load model definitions from GitHub or any accessible URL
|
446
|
+
- ⏱️ **Permission Handling**: Configurable wait time for permission propagation
|
447
|
+
|
448
|
+
**Use Cases:**
|
449
|
+
- Deploy semantic models as part of CI/CD pipelines
|
450
|
+
- Version control your semantic models in Git
|
451
|
+
- Automated model deployment across environments
|
452
|
+
- Streamline DirectLake model creation
|
453
|
+
|
423
454
|
### Delta Lake Optimization
|
424
455
|
|
425
456
|
Duckrun automatically:
|
@@ -534,6 +565,12 @@ con.sql("""
|
|
534
565
|
|
535
566
|
# 5. Download processed files for external systems
|
536
567
|
con.download("processed_reports", "./exports", ['.csv'])
|
568
|
+
|
569
|
+
# 6. Deploy semantic model for Power BI
|
570
|
+
con.deploy(
|
571
|
+
"https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
|
572
|
+
dataset_name="Sales Analytics"
|
573
|
+
)
|
537
574
|
```
|
538
575
|
|
539
576
|
**This example demonstrates:**
|
@@ -541,8 +578,9 @@ con.download("processed_reports", "./exports", ['.csv'])
|
|
541
578
|
- 🔄 **Pipeline orchestration** with SQL and Python tasks
|
542
579
|
- ⚡ **Fast data exploration** with DuckDB
|
543
580
|
- 💾 **Delta table creation** with Spark-style API
|
544
|
-
-
|
545
|
-
-
|
581
|
+
- 🔀 **Schema evolution** and partitioning
|
582
|
+
- 📤 **File downloads** from OneLake Files
|
583
|
+
- 📊 **Semantic model deployment** with DirectLake
|
546
584
|
|
547
585
|
## Schema Evolution & Partitioning Guide
|
548
586
|
|
@@ -0,0 +1,14 @@
|
|
1
|
+
duckrun/__init__.py,sha256=cTj6KQ6hKmgu1z7k9nhDcO5lct049luxjx1V0QnymCo,235
|
2
|
+
duckrun/auth.py,sha256=qPaLQ7InlV9leA9r6E6VEeYavFFoBi0zSN8m_l1aoQs,9545
|
3
|
+
duckrun/core.py,sha256=g9WtvhROxFSo2Idb979fY5HhxbMm_x-tajc_zWMtqCU,46853
|
4
|
+
duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
|
5
|
+
duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
|
6
|
+
duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
|
7
|
+
duckrun/semantic_model.py,sha256=obzlN2-dbEW3JmDop-vrZGGGLi9u3ThhTbgtDjou7uY,29509
|
8
|
+
duckrun/stats.py,sha256=oKIjZ7u5cFVT63FuOl5UqoDsOG3098woSCn-uI6i_sQ,11084
|
9
|
+
duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
|
10
|
+
duckrun-0.2.10.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
11
|
+
duckrun-0.2.10.dist-info/METADATA,sha256=CwDyjJqyfBoISxZ1bfdojVVsP0HcrLylgqCTpMsC6e8,20624
|
12
|
+
duckrun-0.2.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
13
|
+
duckrun-0.2.10.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
14
|
+
duckrun-0.2.10.dist-info/RECORD,,
|
@@ -1,14 +0,0 @@
|
|
1
|
-
duckrun/__init__.py,sha256=VJAx606MLj6SVHu3nVePEO0BBp0WxCBtgk_U1olMU7g,235
|
2
|
-
duckrun/auth.py,sha256=qPaLQ7InlV9leA9r6E6VEeYavFFoBi0zSN8m_l1aoQs,9545
|
3
|
-
duckrun/core.py,sha256=CrWMgA1QHvVF2AAlTlBlQ7VfKsuakcqZa4VuX2WJmik,39279
|
4
|
-
duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
|
5
|
-
duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
|
6
|
-
duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
|
7
|
-
duckrun/semantic_model.py,sha256=4_VgsXAHaWhqxI2kOSB2UtRLa6CoBYFEXt418j5xce0,16739
|
8
|
-
duckrun/stats.py,sha256=CXfb2DWF3PgOckelJooU0y-BAsNT9NFDfDYEmo0mUQQ,10473
|
9
|
-
duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
|
10
|
-
duckrun-0.2.9.dev4.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
11
|
-
duckrun-0.2.9.dev4.dist-info/METADATA,sha256=iBsF-oRskhqicNpd3i5NJq0XZxTepDERJ3i_LVV0rZ4,19277
|
12
|
-
duckrun-0.2.9.dev4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
13
|
-
duckrun-0.2.9.dev4.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
14
|
-
duckrun-0.2.9.dev4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|