duckrun 0.2.9.dev4__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckrun/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  from duckrun.core import Duckrun
4
4
 
5
- __version__ = "0.2.9.dev4"
5
+ __version__ = "0.2.9.dev5"
6
6
 
7
7
  # Expose unified connect method at module level
8
8
  connect = Duckrun.connect
duckrun/core.py CHANGED
@@ -2,6 +2,8 @@ import duckdb
2
2
  import requests
3
3
  import os
4
4
  import importlib.util
5
+ import json
6
+ import time
5
7
  from deltalake import DeltaTable, write_deltalake
6
8
  from typing import List, Tuple, Union, Optional, Callable, Dict, Any
7
9
  from string import Template
@@ -702,8 +704,11 @@ class Duckrun:
702
704
  Deploy a semantic model from a BIM file using DirectLake mode.
703
705
 
704
706
  Args:
705
- bim_url: URL to the BIM file (e.g., GitHub raw URL)
706
- dataset_name: Name for the semantic model (default: lakehouse_schema)
707
+ bim_url: Can be:
708
+ - URL: "https://raw.githubusercontent.com/.../model.bim"
709
+ - Local file: "model.bim"
710
+ - Workspace/Model: "workspace_name/model_name"
711
+ dataset_name: Name for the semantic model (default: source model name if workspace/model format, else lakehouse_schema)
707
712
  wait_seconds: Seconds to wait for permission propagation (default: 5)
708
713
 
709
714
  Returns:
@@ -712,18 +717,28 @@ class Duckrun:
712
717
  Examples:
713
718
  dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
714
719
 
715
- # Deploy with auto-generated name
716
- dr.deploy("https://raw.githubusercontent.com/.../model.bim")
720
+ # Deploy from workspace/model (uses same name by default)
721
+ dr.deploy("Source Workspace/Source Model") # Creates "Source Model"
717
722
 
718
723
  # Deploy with custom name
719
- dr.deploy("https://raw.githubusercontent.com/.../model.bim",
720
- dataset_name="Sales Model")
724
+ dr.deploy("Source Workspace/Source Model", dataset_name="Sales Model Copy")
725
+
726
+ # Deploy from URL or local file
727
+ dr.deploy("https://raw.githubusercontent.com/.../model.bim", dataset_name="My Model")
721
728
  """
722
729
  from .semantic_model import deploy_semantic_model
723
730
 
724
731
  # Auto-generate dataset name if not provided
725
732
  if dataset_name is None:
726
- dataset_name = f"{self.lakehouse_name}_{self.schema}"
733
+ # If using workspace/model format, use the model name
734
+ if "/" in bim_url and not bim_url.startswith(('http://', 'https://')):
735
+ parts = bim_url.split("/")
736
+ if len(parts) == 2:
737
+ dataset_name = parts[1] # Use the model name
738
+ else:
739
+ dataset_name = f"{self.lakehouse_name}_{self.schema}"
740
+ else:
741
+ dataset_name = f"{self.lakehouse_name}_{self.schema}"
727
742
 
728
743
  # Call the deployment function (DirectLake only)
729
744
  return deploy_semantic_model(
@@ -731,7 +746,7 @@ class Duckrun:
731
746
  lakehouse_name_or_id=self.lakehouse_name,
732
747
  schema_name=self.schema,
733
748
  dataset_name=dataset_name,
734
- bim_url=bim_url,
749
+ bim_url_or_path=bim_url,
735
750
  wait_seconds=wait_seconds
736
751
  )
737
752
 
@@ -864,6 +879,145 @@ class WorkspaceConnection:
864
879
  print(f"❌ Error creating lakehouse '{lakehouse_name}': {e}")
865
880
  return False
866
881
 
882
+ def download_bim(self, semantic_model_name: str, output_path: Optional[str] = None) -> Optional[str]:
883
+ """
884
+ Download a semantic model as a BIM (Business Intelligence Model) file.
885
+
886
+ Args:
887
+ semantic_model_name: Name of the semantic model to download
888
+ output_path: Optional path to save the BIM file. If not provided, returns the BIM content as JSON string
889
+
890
+ Returns:
891
+ BIM content as JSON string if output_path is None, or the file path if saved successfully
892
+
893
+ Example:
894
+ con = duckrun.connect("My Workspace")
895
+ # Get BIM content as string
896
+ bim_content = con.download_bim("Sales Model")
897
+ # Or save to file
898
+ con.download_bim("Sales Model", "sales_model.bim")
899
+ """
900
+ try:
901
+ # Get authentication token
902
+ from .auth import get_fabric_api_token
903
+ token = get_fabric_api_token()
904
+ if not token:
905
+ print("❌ Failed to authenticate for downloading semantic model")
906
+ return None
907
+
908
+ # Resolve workspace name to ID
909
+ workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
910
+ if not workspace_id:
911
+ print(f"❌ Workspace '{self.workspace_name}' not found")
912
+ return None
913
+
914
+ # Get semantic model ID
915
+ print(f"🔍 Looking for semantic model '{semantic_model_name}'...")
916
+ url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/semanticModels"
917
+ headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
918
+
919
+ response = requests.get(url, headers=headers)
920
+ response.raise_for_status()
921
+
922
+ models = response.json().get("value", [])
923
+ model = next((m for m in models if m.get("displayName") == semantic_model_name), None)
924
+
925
+ if not model:
926
+ print(f"❌ Semantic model '{semantic_model_name}' not found in workspace '{self.workspace_name}'")
927
+ return None
928
+
929
+ model_id = model.get("id")
930
+ print(f"✓ Found semantic model: {semantic_model_name} (ID: {model_id})")
931
+
932
+ # Get the model definition using the generic items API
933
+ print("📥 Downloading BIM definition...")
934
+ definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{model_id}/getDefinition"
935
+
936
+ # POST request to get definition with TMSL format (which includes model.bim)
937
+ # Note: format parameter should be in query string, not body
938
+ response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
939
+ response.raise_for_status()
940
+
941
+ # Handle long-running operation if needed
942
+ if response.status_code == 202:
943
+ operation_id = response.headers.get('x-ms-operation-id')
944
+ print(f" Waiting for operation to complete...")
945
+
946
+ max_attempts = 30
947
+ for attempt in range(max_attempts):
948
+ time.sleep(2)
949
+
950
+ # Get operation result
951
+ result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
952
+ result_response = requests.get(result_url, headers=headers)
953
+
954
+ # Check operation status
955
+ status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
956
+ status_response = requests.get(status_url, headers=headers)
957
+ status = status_response.json().get('status')
958
+
959
+ if status == 'Succeeded':
960
+ result_data = result_response.json()
961
+ break
962
+ elif status == 'Failed':
963
+ error = status_response.json().get('error', {})
964
+ print(f"❌ Operation failed: {error.get('message')}")
965
+ return None
966
+ elif attempt == max_attempts - 1:
967
+ print("❌ Operation timed out")
968
+ return None
969
+ else:
970
+ result_data = response.json()
971
+
972
+ # Extract BIM content from definition
973
+ definition = result_data.get('definition', {})
974
+ parts = definition.get('parts', [])
975
+
976
+ # Debug: show what parts we have
977
+ if not parts:
978
+ print("❌ No definition parts found in response")
979
+ print(f" Result data keys: {list(result_data.keys())}")
980
+ print(f" Definition keys: {list(definition.keys()) if definition else 'None'}")
981
+ return None
982
+
983
+ print(f" Found {len(parts)} definition parts:")
984
+ for part in parts:
985
+ print(f" - {part.get('path', 'unknown')}")
986
+
987
+ bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
988
+ if not bim_part:
989
+ print("❌ No BIM file found in semantic model definition")
990
+ print(f" Looking for files ending with '.bim', found: {[p.get('path') for p in parts]}")
991
+ return None
992
+
993
+ # Decode the BIM content (it's base64 encoded)
994
+ import base64
995
+ bim_payload = bim_part.get('payload', '')
996
+ bim_content = base64.b64decode(bim_payload).decode('utf-8')
997
+ bim_json = json.loads(bim_content)
998
+
999
+ # Format as pretty JSON
1000
+ bim_formatted = json.dumps(bim_json, indent=2)
1001
+
1002
+ print(f"✓ BIM file downloaded successfully")
1003
+ print(f" - Tables: {len(bim_json.get('model', {}).get('tables', []))}")
1004
+ print(f" - Relationships: {len(bim_json.get('model', {}).get('relationships', []))}")
1005
+
1006
+ # Save to file or return content
1007
+ if output_path:
1008
+ with open(output_path, 'w', encoding='utf-8') as f:
1009
+ f.write(bim_formatted)
1010
+ print(f"✓ Saved to: {output_path}")
1011
+ return output_path
1012
+ else:
1013
+ return bim_formatted
1014
+
1015
+ except Exception as e:
1016
+ print(f"❌ Error downloading semantic model: {e}")
1017
+ import traceback
1018
+ traceback.print_exc()
1019
+ return None
1020
+
867
1021
  def _get_workspace_id_by_name(self, token: str, workspace_name: str) -> Optional[str]:
868
1022
  """Helper method to get workspace ID from name"""
869
1023
  try:
duckrun/semantic_model.py CHANGED
@@ -130,7 +130,7 @@ def check_dataset_exists(dataset_name, workspace_id, client):
130
130
 
131
131
 
132
132
  def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
133
- """Refresh a dataset and monitor progress"""
133
+ """Refresh a dataset and monitor progress using Power BI API"""
134
134
 
135
135
  # If dataset_id not provided, look it up by name
136
136
  if not dataset_id:
@@ -144,48 +144,149 @@ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
144
144
  "objects": []
145
145
  }
146
146
 
147
- response = client.post(
148
- f"/v1/workspaces/{workspace_id}/semanticModels/{dataset_id}/refreshes",
149
- json=payload
150
- )
147
+ # Use Power BI API for refresh (not Fabric API)
148
+ powerbi_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes"
149
+ headers = client._get_headers()
150
+
151
+ response = requests.post(powerbi_url, headers=headers, json=payload)
151
152
 
152
153
  if response.status_code in [200, 202]:
153
154
  print(f"✓ Refresh initiated")
154
155
 
155
- refresh_id = response.json().get('id')
156
- if refresh_id:
157
- print(" Monitoring refresh progress...")
158
- max_attempts = 60
156
+ # For 202, get the refresh_id from the Location header
157
+ if response.status_code == 202:
158
+ location = response.headers.get('Location')
159
+ if location:
160
+ refresh_id = location.split('/')[-1]
161
+ print(" Monitoring refresh progress...")
162
+ max_attempts = 60
163
+ for attempt in range(max_attempts):
164
+ time.sleep(5)
165
+
166
+ # Check refresh status using Power BI API
167
+ status_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes/{refresh_id}"
168
+ status_response = requests.get(status_url, headers=headers)
169
+ status_response.raise_for_status()
170
+ status = status_response.json().get('status')
171
+
172
+ if status == 'Completed':
173
+ print(f"✓ Refresh completed successfully")
174
+ return
175
+ elif status == 'Failed':
176
+ error = status_response.json().get('serviceExceptionJson', '')
177
+ raise Exception(f"Refresh failed: {error}")
178
+ elif status == 'Cancelled':
179
+ raise Exception("Refresh was cancelled")
180
+
181
+ if attempt % 6 == 0:
182
+ print(f" Status: {status}...")
183
+
184
+ raise Exception(f"Refresh timed out")
185
+ else:
186
+ response.raise_for_status()
187
+
188
+
189
+ def download_bim_from_github(url_or_path):
190
+ """
191
+ Load BIM file from URL, local file path, or workspace/model format.
192
+
193
+ Args:
194
+ url_or_path: Can be:
195
+ - Local file path: "model.bim"
196
+ - URL: "https://..."
197
+ - Workspace/Model: "workspace_name/semantic_model_name"
198
+
199
+ Returns:
200
+ BIM content as dictionary
201
+ """
202
+ import os
203
+ import tempfile
204
+
205
+ # Check if it's a local file path
206
+ if os.path.exists(url_or_path):
207
+ print(f"Loading BIM file from local path...")
208
+ with open(url_or_path, 'r', encoding='utf-8') as f:
209
+ bim_content = json.load(f)
210
+ print(f"✓ BIM file loaded from: {url_or_path}")
211
+ # Check if it's a URL
212
+ elif url_or_path.startswith(('http://', 'https://')):
213
+ print(f"Downloading BIM file from URL...")
214
+ response = requests.get(url_or_path)
215
+ response.raise_for_status()
216
+ bim_content = response.json()
217
+ print(f"✓ BIM file downloaded from URL")
218
+ # Check if it's workspace/model format
219
+ elif "/" in url_or_path and not os.path.exists(url_or_path):
220
+ print(f"Downloading BIM from workspace/model...")
221
+ parts = url_or_path.split("/")
222
+ if len(parts) != 2:
223
+ raise ValueError(f"Invalid workspace/model format: '{url_or_path}'. Expected: 'workspace_name/model_name'")
224
+
225
+ ws_name, model_name = parts
226
+
227
+ # Download BIM from the semantic model
228
+ client = FabricRestClient()
229
+ ws_id = get_workspace_id(ws_name, client)
230
+
231
+ # Get semantic model ID
232
+ response = client.get(f"/v1/workspaces/{ws_id}/semanticModels")
233
+ models = response.json().get('value', [])
234
+ model = next((m for m in models if m.get('displayName') == model_name), None)
235
+
236
+ if not model:
237
+ raise ValueError(f"Semantic model '{model_name}' not found in workspace '{ws_name}'")
238
+
239
+ model_id = model.get('id')
240
+
241
+ # Get definition using Items API with TMSL format
242
+ definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{ws_id}/items/{model_id}/getDefinition"
243
+ headers = client._get_headers()
244
+ response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
245
+ response.raise_for_status()
246
+
247
+ # Handle long-running operation
248
+ if response.status_code == 202:
249
+ operation_id = response.headers.get('x-ms-operation-id')
250
+ max_attempts = 30
251
+
159
252
  for attempt in range(max_attempts):
160
- time.sleep(5)
253
+ time.sleep(2)
161
254
 
162
- status_response = client.get(
163
- f"/v1/workspaces/{workspace_id}/semanticModels/{dataset_id}/refreshes/{refresh_id}"
164
- )
255
+ status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
256
+ status_response = requests.get(status_url, headers=headers)
165
257
  status = status_response.json().get('status')
166
258
 
167
- if status == 'Completed':
168
- print(f"✓ Refresh completed successfully")
169
- return
259
+ if status == 'Succeeded':
260
+ result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
261
+ result_response = requests.get(result_url, headers=headers)
262
+ result_data = result_response.json()
263
+ break
170
264
  elif status == 'Failed':
171
265
  error = status_response.json().get('error', {})
172
- raise Exception(f"Refresh failed: {error.get('message', 'Unknown error')}")
173
- elif status == 'Cancelled':
174
- raise Exception("Refresh was cancelled")
175
-
176
- if attempt % 6 == 0:
177
- print(f" Status: {status}...")
178
-
179
- raise Exception(f"Refresh timed out")
180
-
181
-
182
- def download_bim_from_github(url):
183
- """Download BIM file from URL"""
184
- print(f"Downloading BIM file...")
185
- response = requests.get(url)
186
- response.raise_for_status()
187
- bim_content = response.json()
188
- print(f"✓ BIM file downloaded")
266
+ raise Exception(f"Download operation failed: {error.get('message')}")
267
+ elif attempt == max_attempts - 1:
268
+ raise Exception("Download operation timed out")
269
+ else:
270
+ result_data = response.json()
271
+
272
+ # Extract BIM content
273
+ definition = result_data.get('definition', {})
274
+ parts = definition.get('parts', [])
275
+
276
+ bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
277
+ if not bim_part:
278
+ raise Exception("No BIM file found in semantic model definition")
279
+
280
+ # Decode BIM
281
+ import base64
282
+ bim_payload = bim_part.get('payload', '')
283
+ bim_content_str = base64.b64decode(bim_payload).decode('utf-8')
284
+ bim_content = json.loads(bim_content_str)
285
+
286
+ print(f"✓ BIM downloaded from {ws_name}/{model_name}")
287
+ else:
288
+ raise ValueError(f"Invalid BIM source: '{url_or_path}'. Must be a valid file path, URL, or 'workspace/model' format.")
289
+
189
290
  print(f" - Tables: {len(bim_content.get('model', {}).get('tables', []))}")
190
291
  print(f" - Relationships: {len(bim_content.get('model', {}).get('relationships', []))}")
191
292
  return bim_content
@@ -292,23 +393,27 @@ def create_dataset_from_bim(dataset_name, bim_content, workspace_id, client):
292
393
  for attempt in range(max_attempts):
293
394
  time.sleep(2)
294
395
 
295
- # Get operation result (not just status)
296
- result_response = client.get(f"/v1/operations/{operation_id}/result")
297
-
298
396
  # Check if operation is complete by getting the status
299
397
  status_response = client.get(f"/v1/operations/{operation_id}")
300
398
  status = status_response.json().get('status')
301
399
 
302
400
  if status == 'Succeeded':
303
401
  print(f"✓ Operation completed")
304
- # Return the created dataset ID from the result
305
- result_data = result_response.json()
306
- dataset_id = result_data.get('id')
307
- if dataset_id:
308
- return dataset_id
309
- else:
310
- # Fallback: search for the dataset by name
311
- return get_dataset_id(dataset_name, workspace_id, client)
402
+
403
+ # Now get the result (only after status is Succeeded)
404
+ try:
405
+ result_response = client.get(f"/v1/operations/{operation_id}/result")
406
+ result_data = result_response.json()
407
+ dataset_id = result_data.get('id')
408
+ if dataset_id:
409
+ return dataset_id
410
+ except:
411
+ # If result endpoint fails, fallback to searching by name
412
+ pass
413
+
414
+ # Fallback: search for the dataset by name
415
+ return get_dataset_id(dataset_name, workspace_id, client)
416
+
312
417
  elif status == 'Failed':
313
418
  error = status_response.json().get('error', {})
314
419
  raise Exception(f"Operation failed: {error.get('message')}")
@@ -326,7 +431,7 @@ def create_dataset_from_bim(dataset_name, bim_content, workspace_id, client):
326
431
 
327
432
 
328
433
  def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_name, dataset_name,
329
- bim_url, wait_seconds=5):
434
+ bim_url_or_path, wait_seconds=5):
330
435
  """
331
436
  Deploy a semantic model using DirectLake mode.
332
437
 
@@ -335,15 +440,20 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
335
440
  lakehouse_name_or_id: Name or GUID of the lakehouse
336
441
  schema_name: Schema name (e.g., 'dbo', 'staging')
337
442
  dataset_name: Name for the semantic model
338
- bim_url: URL to the BIM file
443
+ bim_url_or_path: URL to the BIM file or local file path (e.g., 'model.bim' or 'https://...')
339
444
  wait_seconds: Seconds to wait before refresh (default: 5)
340
445
 
341
446
  Returns:
342
447
  1 for success, 0 for failure
343
448
 
344
449
  Examples:
450
+ # Using a URL
345
451
  dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
346
452
  dr.deploy("https://raw.githubusercontent.com/.../model.bim")
453
+
454
+ # Using a local file
455
+ dr.deploy("./my_model.bim")
456
+ dr.deploy("C:/path/to/model.bim")
347
457
  """
348
458
  print("=" * 70)
349
459
  print("Semantic Model Deployment (DirectLake)")
@@ -382,8 +492,8 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
382
492
  lakehouse_id = get_lakehouse_id(lakehouse_name_or_id, workspace_id, client)
383
493
 
384
494
  # Step 4: Download and update BIM
385
- print("\n[Step 4/6] Downloading and configuring BIM file...")
386
- bim_content = download_bim_from_github(bim_url)
495
+ print("\n[Step 4/6] Loading and configuring BIM file...")
496
+ bim_content = download_bim_from_github(bim_url_or_path)
387
497
 
388
498
  modified_bim = update_bim_for_directlake(bim_content, workspace_id, lakehouse_id, schema_name)
389
499
  modified_bim['name'] = dataset_name
@@ -425,3 +535,187 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
425
535
  print(f" - Check tables are in Delta format")
426
536
  print("=" * 70)
427
537
  return 0
538
+
539
+
540
+ def copy_model(ws_source, model_name, destination, new_model_name=None, wait_seconds=5):
541
+ """
542
+ Copy a semantic model from one workspace to another.
543
+
544
+ This is a convenience function that downloads a BIM file from a source workspace
545
+ and deploys it to a destination lakehouse in one operation.
546
+
547
+ Args:
548
+ ws_source: Source workspace name or GUID
549
+ model_name: Name of the semantic model to copy
550
+ destination: Destination in format "workspace/lakehouse.lakehouse/schema"
551
+ new_model_name: Name for the new semantic model (default: same as source)
552
+ wait_seconds: Seconds to wait before refresh (default: 5)
553
+
554
+ Returns:
555
+ 1 for success, 0 for failure
556
+
557
+ Examples:
558
+ # Copy to same workspace, different lakehouse
559
+ copy_model("My Workspace", "Sales Model", "My Workspace/Target Lakehouse.lakehouse/dbo")
560
+
561
+ # Copy to different workspace with new name
562
+ copy_model("Source WS", "Production Model", "Target WS/Data Lake.lakehouse/analytics",
563
+ new_model_name="Production Model - Copy")
564
+
565
+ # Using the connect pattern
566
+ import duckrun
567
+ duckrun.semantic_model.copy_model("Source", "Model", "Target/LH.lakehouse/dbo")
568
+ """
569
+ import tempfile
570
+ import os
571
+
572
+ print("=" * 70)
573
+ print("Semantic Model Copy Operation")
574
+ print("=" * 70)
575
+
576
+ try:
577
+ # Parse destination
578
+ parts = destination.split("/")
579
+ if len(parts) != 3:
580
+ raise ValueError(
581
+ f"Invalid destination format: '{destination}'. "
582
+ "Expected format: 'workspace/lakehouse.lakehouse/schema'"
583
+ )
584
+
585
+ ws_dest, lakehouse, schema = parts
586
+
587
+ # Remove .lakehouse suffix if present
588
+ if lakehouse.endswith(".lakehouse"):
589
+ lakehouse = lakehouse[:-10]
590
+
591
+ # Use source model name if new name not provided
592
+ if not new_model_name:
593
+ new_model_name = model_name
594
+
595
+ print(f"\nSource:")
596
+ print(f" Workspace: {ws_source}")
597
+ print(f" Model: {model_name}")
598
+ print(f"\nDestination:")
599
+ print(f" Workspace: {ws_dest}")
600
+ print(f" Lakehouse: {lakehouse}")
601
+ print(f" Schema: {schema}")
602
+ print(f" New Model Name: {new_model_name}")
603
+
604
+ # Step 1: Download BIM from source
605
+ print("\n" + "-" * 70)
606
+ print("[Step 1/2] Downloading BIM from source workspace...")
607
+ print("-" * 70)
608
+
609
+ client = FabricRestClient()
610
+ ws_source_id = get_workspace_id(ws_source, client)
611
+
612
+ # Use temporary file for BIM content
613
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.bim', delete=False, encoding='utf-8') as tmp_file:
614
+ temp_bim_path = tmp_file.name
615
+
616
+ # Get semantic model ID
617
+ response = client.get(f"/v1/workspaces/{ws_source_id}/semanticModels")
618
+ models = response.json().get('value', [])
619
+ model = next((m for m in models if m.get('displayName') == model_name), None)
620
+
621
+ if not model:
622
+ raise ValueError(f"Semantic model '{model_name}' not found in workspace '{ws_source}'")
623
+
624
+ model_id = model.get('id')
625
+ print(f"✓ Found source model: {model_name} (ID: {model_id})")
626
+
627
+ # Get definition using Items API with TMSL format
628
+ print(" Downloading BIM definition...")
629
+ definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{ws_source_id}/items/{model_id}/getDefinition"
630
+ headers = client._get_headers()
631
+ response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
632
+ response.raise_for_status()
633
+
634
+ # Handle long-running operation
635
+ if response.status_code == 202:
636
+ operation_id = response.headers.get('x-ms-operation-id')
637
+ max_attempts = 30
638
+
639
+ for attempt in range(max_attempts):
640
+ time.sleep(2)
641
+
642
+ status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
643
+ status_response = requests.get(status_url, headers=headers)
644
+ status = status_response.json().get('status')
645
+
646
+ if status == 'Succeeded':
647
+ result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
648
+ result_response = requests.get(result_url, headers=headers)
649
+ result_data = result_response.json()
650
+ break
651
+ elif status == 'Failed':
652
+ error = status_response.json().get('error', {})
653
+ raise Exception(f"Download operation failed: {error.get('message')}")
654
+ elif attempt == max_attempts - 1:
655
+ raise Exception("Download operation timed out")
656
+ else:
657
+ result_data = response.json()
658
+
659
+ # Extract BIM content
660
+ definition = result_data.get('definition', {})
661
+ parts = definition.get('parts', [])
662
+
663
+ bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
664
+ if not bim_part:
665
+ raise Exception("No BIM file found in semantic model definition")
666
+
667
+ # Decode and save BIM
668
+ import base64
669
+ bim_payload = bim_part.get('payload', '')
670
+ bim_content = base64.b64decode(bim_payload).decode('utf-8')
671
+ bim_json = json.loads(bim_content)
672
+
673
+ # Write to temp file
674
+ json.dump(bim_json, tmp_file, indent=2)
675
+
676
+ print(f"✓ BIM downloaded successfully")
677
+ print(f" - Tables: {len(bim_json.get('model', {}).get('tables', []))}")
678
+ print(f" - Relationships: {len(bim_json.get('model', {}).get('relationships', []))}")
679
+
680
+ # Step 2: Deploy to destination
681
+ print("\n" + "-" * 70)
682
+ print("[Step 2/2] Deploying to destination workspace...")
683
+ print("-" * 70)
684
+
685
+ result = deploy_semantic_model(
686
+ workspace_name_or_id=ws_dest,
687
+ lakehouse_name_or_id=lakehouse,
688
+ schema_name=schema,
689
+ dataset_name=new_model_name,
690
+ bim_url_or_path=temp_bim_path,
691
+ wait_seconds=wait_seconds
692
+ )
693
+
694
+ # Clean up temp file
695
+ try:
696
+ os.unlink(temp_bim_path)
697
+ except:
698
+ pass
699
+
700
+ if result == 1:
701
+ print("\n" + "=" * 70)
702
+ print("🎉 Copy Operation Completed!")
703
+ print("=" * 70)
704
+ print(f"Source: {ws_source}/{model_name}")
705
+ print(f"Destination: {ws_dest}/{lakehouse}/{schema}/{new_model_name}")
706
+ print("=" * 70)
707
+
708
+ return result
709
+
710
+ except Exception as e:
711
+ print("\n" + "=" * 70)
712
+ print("❌ Copy Operation Failed")
713
+ print("=" * 70)
714
+ print(f"Error: {str(e)}")
715
+ print("\n💡 Troubleshooting:")
716
+ print(f" - Verify source workspace '{ws_source}' and model '{model_name}' exist")
717
+ print(f" - Verify destination workspace and lakehouse exist")
718
+ print(f" - Ensure you have permissions for both workspaces")
719
+ print("=" * 70)
720
+ return 0
721
+
duckrun/stats.py CHANGED
@@ -4,6 +4,7 @@ Delta Lake table statistics functionality for duckrun
4
4
  import duckdb
5
5
  from deltalake import DeltaTable
6
6
  from datetime import datetime
7
+ import pyarrow as pa
7
8
 
8
9
 
9
10
  def _table_exists(duckrun_instance, schema_name: str, table_name: str) -> bool:
@@ -149,17 +150,23 @@ def get_stats(duckrun_instance, source: str):
149
150
  dt = DeltaTable(table_path)
150
151
  add_actions = dt.get_add_actions(flatten=True)
151
152
 
152
- # Convert to dict - compatible with both old and new deltalake versions
153
- # Try to_pydict() first (old versions), fall back to to_pylist() (new versions)
153
+ # Convert RecordBatch to dict - works with both PyArrow (deltalake 0.18.2) and arro3 (newer versions)
154
+ # Strategy: Use duck typing - try direct conversion first, then manual extraction
155
+ # This works because both PyArrow and arro3 RecordBatches have schema and column() methods
156
+
154
157
  try:
158
+ # Old deltalake (0.18.2): PyArrow RecordBatch has to_pydict() directly
155
159
  xx = add_actions.to_pydict()
156
160
  except AttributeError:
157
- # New version with arro3: use to_pylist() and convert to dict of lists
158
- records = add_actions.to_pylist()
159
- if records:
160
- # Convert list of dicts to dict of lists
161
- xx = {key: [record[key] for record in records] for key in records[0].keys()}
161
+ # New deltalake with arro3: Use schema and column() methods
162
+ # This is the universal approach that works with both PyArrow and arro3
163
+ if hasattr(add_actions, 'schema') and hasattr(add_actions, 'column'):
164
+ # Extract columns manually and create PyArrow table
165
+ arrow_table = pa.table({name: add_actions.column(name) for name in add_actions.schema.names})
166
+ xx = arrow_table.to_pydict()
162
167
  else:
168
+ # Fallback: empty dict (shouldn't happen)
169
+ print(f"Warning: Could not convert RecordBatch for table '{tbl}': Unexpected type {type(add_actions)}")
163
170
  xx = {}
164
171
 
165
172
  # Check if VORDER exists
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.9.dev4
3
+ Version: 0.2.10
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -420,6 +420,37 @@ success = con.run(pipeline) # Returns True only if ALL tasks succeed
420
420
 
421
421
  This prevents downstream tasks from processing incomplete or corrupted data.
422
422
 
423
+ ### Semantic Model Deployment
424
+
425
+ Deploy Power BI semantic models directly from BIM files using DirectLake mode:
426
+
427
+ ```python
428
+ # Connect to lakehouse
429
+ con = duckrun.connect("Analytics/Sales.lakehouse/dbo")
430
+
431
+ # Deploy with auto-generated name (lakehouse_schema)
432
+ con.deploy("https://raw.githubusercontent.com/user/repo/main/model.bim")
433
+
434
+ # Deploy with custom name
435
+ con.deploy(
436
+ "https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
437
+ dataset_name="Sales Analytics Model",
438
+ wait_seconds=10 # Wait for permission propagation
439
+ )
440
+ ```
441
+
442
+ **Features:**
443
+ - 🚀 **DirectLake Mode**: Deploys semantic models with DirectLake connection
444
+ - 🔄 **Automatic Configuration**: Auto-configures workspace, lakehouse, and schema connections
445
+ - 📦 **BIM from URL**: Load model definitions from GitHub or any accessible URL
446
+ - ⏱️ **Permission Handling**: Configurable wait time for permission propagation
447
+
448
+ **Use Cases:**
449
+ - Deploy semantic models as part of CI/CD pipelines
450
+ - Version control your semantic models in Git
451
+ - Automated model deployment across environments
452
+ - Streamline DirectLake model creation
453
+
423
454
  ### Delta Lake Optimization
424
455
 
425
456
  Duckrun automatically:
@@ -534,6 +565,12 @@ con.sql("""
534
565
 
535
566
  # 5. Download processed files for external systems
536
567
  con.download("processed_reports", "./exports", ['.csv'])
568
+
569
+ # 6. Deploy semantic model for Power BI
570
+ con.deploy(
571
+ "https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
572
+ dataset_name="Sales Analytics"
573
+ )
537
574
  ```
538
575
 
539
576
  **This example demonstrates:**
@@ -541,8 +578,9 @@ con.download("processed_reports", "./exports", ['.csv'])
541
578
  - 🔄 **Pipeline orchestration** with SQL and Python tasks
542
579
  - ⚡ **Fast data exploration** with DuckDB
543
580
  - 💾 **Delta table creation** with Spark-style API
544
- - **Schema evolution** and partitioning
545
- - �📤 **File downloads** from OneLake Files
581
+ - 🔀 **Schema evolution** and partitioning
582
+ - 📤 **File downloads** from OneLake Files
583
+ - 📊 **Semantic model deployment** with DirectLake
546
584
 
547
585
  ## Schema Evolution & Partitioning Guide
548
586
 
@@ -0,0 +1,14 @@
1
+ duckrun/__init__.py,sha256=cTj6KQ6hKmgu1z7k9nhDcO5lct049luxjx1V0QnymCo,235
2
+ duckrun/auth.py,sha256=qPaLQ7InlV9leA9r6E6VEeYavFFoBi0zSN8m_l1aoQs,9545
3
+ duckrun/core.py,sha256=g9WtvhROxFSo2Idb979fY5HhxbMm_x-tajc_zWMtqCU,46853
4
+ duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
5
+ duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
6
+ duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
7
+ duckrun/semantic_model.py,sha256=obzlN2-dbEW3JmDop-vrZGGGLi9u3ThhTbgtDjou7uY,29509
8
+ duckrun/stats.py,sha256=oKIjZ7u5cFVT63FuOl5UqoDsOG3098woSCn-uI6i_sQ,11084
9
+ duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
10
+ duckrun-0.2.10.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
11
+ duckrun-0.2.10.dist-info/METADATA,sha256=CwDyjJqyfBoISxZ1bfdojVVsP0HcrLylgqCTpMsC6e8,20624
12
+ duckrun-0.2.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ duckrun-0.2.10.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
14
+ duckrun-0.2.10.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- duckrun/__init__.py,sha256=VJAx606MLj6SVHu3nVePEO0BBp0WxCBtgk_U1olMU7g,235
2
- duckrun/auth.py,sha256=qPaLQ7InlV9leA9r6E6VEeYavFFoBi0zSN8m_l1aoQs,9545
3
- duckrun/core.py,sha256=CrWMgA1QHvVF2AAlTlBlQ7VfKsuakcqZa4VuX2WJmik,39279
4
- duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
5
- duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
6
- duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
7
- duckrun/semantic_model.py,sha256=4_VgsXAHaWhqxI2kOSB2UtRLa6CoBYFEXt418j5xce0,16739
8
- duckrun/stats.py,sha256=CXfb2DWF3PgOckelJooU0y-BAsNT9NFDfDYEmo0mUQQ,10473
9
- duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
10
- duckrun-0.2.9.dev4.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
11
- duckrun-0.2.9.dev4.dist-info/METADATA,sha256=iBsF-oRskhqicNpd3i5NJq0XZxTepDERJ3i_LVV0rZ4,19277
12
- duckrun-0.2.9.dev4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- duckrun-0.2.9.dev4.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
14
- duckrun-0.2.9.dev4.dist-info/RECORD,,