duckrun 0.2.9.dev5__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckrun/core.py CHANGED
@@ -2,6 +2,8 @@ import duckdb
2
2
  import requests
3
3
  import os
4
4
  import importlib.util
5
+ import json
6
+ import time
5
7
  from deltalake import DeltaTable, write_deltalake
6
8
  from typing import List, Tuple, Union, Optional, Callable, Dict, Any
7
9
  from string import Template
@@ -702,8 +704,11 @@ class Duckrun:
702
704
  Deploy a semantic model from a BIM file using DirectLake mode.
703
705
 
704
706
  Args:
705
- bim_url: URL to the BIM file (e.g., GitHub raw URL)
706
- dataset_name: Name for the semantic model (default: lakehouse_schema)
707
+ bim_url: Can be:
708
+ - URL: "https://raw.githubusercontent.com/.../model.bim"
709
+ - Local file: "model.bim"
710
+ - Workspace/Model: "workspace_name/model_name"
711
+ dataset_name: Name for the semantic model (default: source model name if workspace/model format, else lakehouse_schema)
707
712
  wait_seconds: Seconds to wait for permission propagation (default: 5)
708
713
 
709
714
  Returns:
@@ -712,18 +717,28 @@ class Duckrun:
712
717
  Examples:
713
718
  dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
714
719
 
715
- # Deploy with auto-generated name
716
- dr.deploy("https://raw.githubusercontent.com/.../model.bim")
720
+ # Deploy from workspace/model (uses same name by default)
721
+ dr.deploy("Source Workspace/Source Model") # Creates "Source Model"
717
722
 
718
723
  # Deploy with custom name
719
- dr.deploy("https://raw.githubusercontent.com/.../model.bim",
720
- dataset_name="Sales Model")
724
+ dr.deploy("Source Workspace/Source Model", dataset_name="Sales Model Copy")
725
+
726
+ # Deploy from URL or local file
727
+ dr.deploy("https://raw.githubusercontent.com/.../model.bim", dataset_name="My Model")
721
728
  """
722
729
  from .semantic_model import deploy_semantic_model
723
730
 
724
731
  # Auto-generate dataset name if not provided
725
732
  if dataset_name is None:
726
- dataset_name = f"{self.lakehouse_name}_{self.schema}"
733
+ # If using workspace/model format, use the model name
734
+ if "/" in bim_url and not bim_url.startswith(('http://', 'https://')):
735
+ parts = bim_url.split("/")
736
+ if len(parts) == 2:
737
+ dataset_name = parts[1] # Use the model name
738
+ else:
739
+ dataset_name = f"{self.lakehouse_name}_{self.schema}"
740
+ else:
741
+ dataset_name = f"{self.lakehouse_name}_{self.schema}"
727
742
 
728
743
  # Call the deployment function (DirectLake only)
729
744
  return deploy_semantic_model(
@@ -731,7 +746,7 @@ class Duckrun:
731
746
  lakehouse_name_or_id=self.lakehouse_name,
732
747
  schema_name=self.schema,
733
748
  dataset_name=dataset_name,
734
- bim_url=bim_url,
749
+ bim_url_or_path=bim_url,
735
750
  wait_seconds=wait_seconds
736
751
  )
737
752
 
@@ -864,6 +879,145 @@ class WorkspaceConnection:
864
879
  print(f"❌ Error creating lakehouse '{lakehouse_name}': {e}")
865
880
  return False
866
881
 
882
+ def download_bim(self, semantic_model_name: str, output_path: Optional[str] = None) -> Optional[str]:
883
+ """
884
+ Download a semantic model as a BIM (Business Intelligence Model) file.
885
+
886
+ Args:
887
+ semantic_model_name: Name of the semantic model to download
888
+ output_path: Optional path to save the BIM file. If not provided, returns the BIM content as JSON string
889
+
890
+ Returns:
891
+ BIM content as JSON string if output_path is None, or the file path if saved successfully
892
+
893
+ Example:
894
+ con = duckrun.connect("My Workspace")
895
+ # Get BIM content as string
896
+ bim_content = con.download_bim("Sales Model")
897
+ # Or save to file
898
+ con.download_bim("Sales Model", "sales_model.bim")
899
+ """
900
+ try:
901
+ # Get authentication token
902
+ from .auth import get_fabric_api_token
903
+ token = get_fabric_api_token()
904
+ if not token:
905
+ print("❌ Failed to authenticate for downloading semantic model")
906
+ return None
907
+
908
+ # Resolve workspace name to ID
909
+ workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
910
+ if not workspace_id:
911
+ print(f"❌ Workspace '{self.workspace_name}' not found")
912
+ return None
913
+
914
+ # Get semantic model ID
915
+ print(f"🔍 Looking for semantic model '{semantic_model_name}'...")
916
+ url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/semanticModels"
917
+ headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
918
+
919
+ response = requests.get(url, headers=headers)
920
+ response.raise_for_status()
921
+
922
+ models = response.json().get("value", [])
923
+ model = next((m for m in models if m.get("displayName") == semantic_model_name), None)
924
+
925
+ if not model:
926
+ print(f"❌ Semantic model '{semantic_model_name}' not found in workspace '{self.workspace_name}'")
927
+ return None
928
+
929
+ model_id = model.get("id")
930
+ print(f"✓ Found semantic model: {semantic_model_name} (ID: {model_id})")
931
+
932
+ # Get the model definition using the generic items API
933
+ print("📥 Downloading BIM definition...")
934
+ definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{model_id}/getDefinition"
935
+
936
+ # POST request to get definition with TMSL format (which includes model.bim)
937
+ # Note: format parameter should be in query string, not body
938
+ response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
939
+ response.raise_for_status()
940
+
941
+ # Handle long-running operation if needed
942
+ if response.status_code == 202:
943
+ operation_id = response.headers.get('x-ms-operation-id')
944
+ print(f" Waiting for operation to complete...")
945
+
946
+ max_attempts = 30
947
+ for attempt in range(max_attempts):
948
+ time.sleep(2)
949
+
950
+ # Get operation result
951
+ result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
952
+ result_response = requests.get(result_url, headers=headers)
953
+
954
+ # Check operation status
955
+ status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
956
+ status_response = requests.get(status_url, headers=headers)
957
+ status = status_response.json().get('status')
958
+
959
+ if status == 'Succeeded':
960
+ result_data = result_response.json()
961
+ break
962
+ elif status == 'Failed':
963
+ error = status_response.json().get('error', {})
964
+ print(f"❌ Operation failed: {error.get('message')}")
965
+ return None
966
+ elif attempt == max_attempts - 1:
967
+ print("❌ Operation timed out")
968
+ return None
969
+ else:
970
+ result_data = response.json()
971
+
972
+ # Extract BIM content from definition
973
+ definition = result_data.get('definition', {})
974
+ parts = definition.get('parts', [])
975
+
976
+ # Debug: show what parts we have
977
+ if not parts:
978
+ print("❌ No definition parts found in response")
979
+ print(f" Result data keys: {list(result_data.keys())}")
980
+ print(f" Definition keys: {list(definition.keys()) if definition else 'None'}")
981
+ return None
982
+
983
+ print(f" Found {len(parts)} definition parts:")
984
+ for part in parts:
985
+ print(f" - {part.get('path', 'unknown')}")
986
+
987
+ bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
988
+ if not bim_part:
989
+ print("❌ No BIM file found in semantic model definition")
990
+ print(f" Looking for files ending with '.bim', found: {[p.get('path') for p in parts]}")
991
+ return None
992
+
993
+ # Decode the BIM content (it's base64 encoded)
994
+ import base64
995
+ bim_payload = bim_part.get('payload', '')
996
+ bim_content = base64.b64decode(bim_payload).decode('utf-8')
997
+ bim_json = json.loads(bim_content)
998
+
999
+ # Format as pretty JSON
1000
+ bim_formatted = json.dumps(bim_json, indent=2)
1001
+
1002
+ print(f"✓ BIM file downloaded successfully")
1003
+ print(f" - Tables: {len(bim_json.get('model', {}).get('tables', []))}")
1004
+ print(f" - Relationships: {len(bim_json.get('model', {}).get('relationships', []))}")
1005
+
1006
+ # Save to file or return content
1007
+ if output_path:
1008
+ with open(output_path, 'w', encoding='utf-8') as f:
1009
+ f.write(bim_formatted)
1010
+ print(f"✓ Saved to: {output_path}")
1011
+ return output_path
1012
+ else:
1013
+ return bim_formatted
1014
+
1015
+ except Exception as e:
1016
+ print(f"❌ Error downloading semantic model: {e}")
1017
+ import traceback
1018
+ traceback.print_exc()
1019
+ return None
1020
+
867
1021
  def _get_workspace_id_by_name(self, token: str, workspace_name: str) -> Optional[str]:
868
1022
  """Helper method to get workspace ID from name"""
869
1023
  try:
duckrun/semantic_model.py CHANGED
@@ -186,13 +186,107 @@ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
186
186
  response.raise_for_status()
187
187
 
188
188
 
189
- def download_bim_from_github(url):
190
- """Download BIM file from URL"""
191
- print(f"Downloading BIM file...")
192
- response = requests.get(url)
193
- response.raise_for_status()
194
- bim_content = response.json()
195
- print(f"✓ BIM file downloaded")
189
+ def download_bim_from_github(url_or_path):
190
+ """
191
+ Load BIM file from URL, local file path, or workspace/model format.
192
+
193
+ Args:
194
+ url_or_path: Can be:
195
+ - Local file path: "model.bim"
196
+ - URL: "https://..."
197
+ - Workspace/Model: "workspace_name/semantic_model_name"
198
+
199
+ Returns:
200
+ BIM content as dictionary
201
+ """
202
+ import os
203
+ import tempfile
204
+
205
+ # Check if it's a local file path
206
+ if os.path.exists(url_or_path):
207
+ print(f"Loading BIM file from local path...")
208
+ with open(url_or_path, 'r', encoding='utf-8') as f:
209
+ bim_content = json.load(f)
210
+ print(f"✓ BIM file loaded from: {url_or_path}")
211
+ # Check if it's a URL
212
+ elif url_or_path.startswith(('http://', 'https://')):
213
+ print(f"Downloading BIM file from URL...")
214
+ response = requests.get(url_or_path)
215
+ response.raise_for_status()
216
+ bim_content = response.json()
217
+ print(f"✓ BIM file downloaded from URL")
218
+ # Check if it's workspace/model format
219
+ elif "/" in url_or_path and not os.path.exists(url_or_path):
220
+ print(f"Downloading BIM from workspace/model...")
221
+ parts = url_or_path.split("/")
222
+ if len(parts) != 2:
223
+ raise ValueError(f"Invalid workspace/model format: '{url_or_path}'. Expected: 'workspace_name/model_name'")
224
+
225
+ ws_name, model_name = parts
226
+
227
+ # Download BIM from the semantic model
228
+ client = FabricRestClient()
229
+ ws_id = get_workspace_id(ws_name, client)
230
+
231
+ # Get semantic model ID
232
+ response = client.get(f"/v1/workspaces/{ws_id}/semanticModels")
233
+ models = response.json().get('value', [])
234
+ model = next((m for m in models if m.get('displayName') == model_name), None)
235
+
236
+ if not model:
237
+ raise ValueError(f"Semantic model '{model_name}' not found in workspace '{ws_name}'")
238
+
239
+ model_id = model.get('id')
240
+
241
+ # Get definition using Items API with TMSL format
242
+ definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{ws_id}/items/{model_id}/getDefinition"
243
+ headers = client._get_headers()
244
+ response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
245
+ response.raise_for_status()
246
+
247
+ # Handle long-running operation
248
+ if response.status_code == 202:
249
+ operation_id = response.headers.get('x-ms-operation-id')
250
+ max_attempts = 30
251
+
252
+ for attempt in range(max_attempts):
253
+ time.sleep(2)
254
+
255
+ status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
256
+ status_response = requests.get(status_url, headers=headers)
257
+ status = status_response.json().get('status')
258
+
259
+ if status == 'Succeeded':
260
+ result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
261
+ result_response = requests.get(result_url, headers=headers)
262
+ result_data = result_response.json()
263
+ break
264
+ elif status == 'Failed':
265
+ error = status_response.json().get('error', {})
266
+ raise Exception(f"Download operation failed: {error.get('message')}")
267
+ elif attempt == max_attempts - 1:
268
+ raise Exception("Download operation timed out")
269
+ else:
270
+ result_data = response.json()
271
+
272
+ # Extract BIM content
273
+ definition = result_data.get('definition', {})
274
+ parts = definition.get('parts', [])
275
+
276
+ bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
277
+ if not bim_part:
278
+ raise Exception("No BIM file found in semantic model definition")
279
+
280
+ # Decode BIM
281
+ import base64
282
+ bim_payload = bim_part.get('payload', '')
283
+ bim_content_str = base64.b64decode(bim_payload).decode('utf-8')
284
+ bim_content = json.loads(bim_content_str)
285
+
286
+ print(f"✓ BIM downloaded from {ws_name}/{model_name}")
287
+ else:
288
+ raise ValueError(f"Invalid BIM source: '{url_or_path}'. Must be a valid file path, URL, or 'workspace/model' format.")
289
+
196
290
  print(f" - Tables: {len(bim_content.get('model', {}).get('tables', []))}")
197
291
  print(f" - Relationships: {len(bim_content.get('model', {}).get('relationships', []))}")
198
292
  return bim_content
@@ -299,23 +393,27 @@ def create_dataset_from_bim(dataset_name, bim_content, workspace_id, client):
299
393
  for attempt in range(max_attempts):
300
394
  time.sleep(2)
301
395
 
302
- # Get operation result (not just status)
303
- result_response = client.get(f"/v1/operations/{operation_id}/result")
304
-
305
396
  # Check if operation is complete by getting the status
306
397
  status_response = client.get(f"/v1/operations/{operation_id}")
307
398
  status = status_response.json().get('status')
308
399
 
309
400
  if status == 'Succeeded':
310
401
  print(f"✓ Operation completed")
311
- # Return the created dataset ID from the result
312
- result_data = result_response.json()
313
- dataset_id = result_data.get('id')
314
- if dataset_id:
315
- return dataset_id
316
- else:
317
- # Fallback: search for the dataset by name
318
- return get_dataset_id(dataset_name, workspace_id, client)
402
+
403
+ # Now get the result (only after status is Succeeded)
404
+ try:
405
+ result_response = client.get(f"/v1/operations/{operation_id}/result")
406
+ result_data = result_response.json()
407
+ dataset_id = result_data.get('id')
408
+ if dataset_id:
409
+ return dataset_id
410
+ except:
411
+ # If result endpoint fails, fallback to searching by name
412
+ pass
413
+
414
+ # Fallback: search for the dataset by name
415
+ return get_dataset_id(dataset_name, workspace_id, client)
416
+
319
417
  elif status == 'Failed':
320
418
  error = status_response.json().get('error', {})
321
419
  raise Exception(f"Operation failed: {error.get('message')}")
@@ -333,7 +431,7 @@ def create_dataset_from_bim(dataset_name, bim_content, workspace_id, client):
333
431
 
334
432
 
335
433
  def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_name, dataset_name,
336
- bim_url, wait_seconds=5):
434
+ bim_url_or_path, wait_seconds=5):
337
435
  """
338
436
  Deploy a semantic model using DirectLake mode.
339
437
 
@@ -342,15 +440,20 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
342
440
  lakehouse_name_or_id: Name or GUID of the lakehouse
343
441
  schema_name: Schema name (e.g., 'dbo', 'staging')
344
442
  dataset_name: Name for the semantic model
345
- bim_url: URL to the BIM file
443
+ bim_url_or_path: URL to the BIM file or local file path (e.g., 'model.bim' or 'https://...')
346
444
  wait_seconds: Seconds to wait before refresh (default: 5)
347
445
 
348
446
  Returns:
349
447
  1 for success, 0 for failure
350
448
 
351
449
  Examples:
450
+ # Using a URL
352
451
  dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
353
452
  dr.deploy("https://raw.githubusercontent.com/.../model.bim")
453
+
454
+ # Using a local file
455
+ dr.deploy("./my_model.bim")
456
+ dr.deploy("C:/path/to/model.bim")
354
457
  """
355
458
  print("=" * 70)
356
459
  print("Semantic Model Deployment (DirectLake)")
@@ -389,8 +492,8 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
389
492
  lakehouse_id = get_lakehouse_id(lakehouse_name_or_id, workspace_id, client)
390
493
 
391
494
  # Step 4: Download and update BIM
392
- print("\n[Step 4/6] Downloading and configuring BIM file...")
393
- bim_content = download_bim_from_github(bim_url)
495
+ print("\n[Step 4/6] Loading and configuring BIM file...")
496
+ bim_content = download_bim_from_github(bim_url_or_path)
394
497
 
395
498
  modified_bim = update_bim_for_directlake(bim_content, workspace_id, lakehouse_id, schema_name)
396
499
  modified_bim['name'] = dataset_name
@@ -432,3 +535,187 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
432
535
  print(f" - Check tables are in Delta format")
433
536
  print("=" * 70)
434
537
  return 0
538
+
539
+
540
+ def copy_model(ws_source, model_name, destination, new_model_name=None, wait_seconds=5):
541
+ """
542
+ Copy a semantic model from one workspace to another.
543
+
544
+ This is a convenience function that downloads a BIM file from a source workspace
545
+ and deploys it to a destination lakehouse in one operation.
546
+
547
+ Args:
548
+ ws_source: Source workspace name or GUID
549
+ model_name: Name of the semantic model to copy
550
+ destination: Destination in format "workspace/lakehouse.lakehouse/schema"
551
+ new_model_name: Name for the new semantic model (default: same as source)
552
+ wait_seconds: Seconds to wait before refresh (default: 5)
553
+
554
+ Returns:
555
+ 1 for success, 0 for failure
556
+
557
+ Examples:
558
+ # Copy to same workspace, different lakehouse
559
+ copy_model("My Workspace", "Sales Model", "My Workspace/Target Lakehouse.lakehouse/dbo")
560
+
561
+ # Copy to different workspace with new name
562
+ copy_model("Source WS", "Production Model", "Target WS/Data Lake.lakehouse/analytics",
563
+ new_model_name="Production Model - Copy")
564
+
565
+ # Using the connect pattern
566
+ import duckrun
567
+ duckrun.semantic_model.copy_model("Source", "Model", "Target/LH.lakehouse/dbo")
568
+ """
569
+ import tempfile
570
+ import os
571
+
572
+ print("=" * 70)
573
+ print("Semantic Model Copy Operation")
574
+ print("=" * 70)
575
+
576
+ try:
577
+ # Parse destination
578
+ parts = destination.split("/")
579
+ if len(parts) != 3:
580
+ raise ValueError(
581
+ f"Invalid destination format: '{destination}'. "
582
+ "Expected format: 'workspace/lakehouse.lakehouse/schema'"
583
+ )
584
+
585
+ ws_dest, lakehouse, schema = parts
586
+
587
+ # Remove .lakehouse suffix if present
588
+ if lakehouse.endswith(".lakehouse"):
589
+ lakehouse = lakehouse[:-10]
590
+
591
+ # Use source model name if new name not provided
592
+ if not new_model_name:
593
+ new_model_name = model_name
594
+
595
+ print(f"\nSource:")
596
+ print(f" Workspace: {ws_source}")
597
+ print(f" Model: {model_name}")
598
+ print(f"\nDestination:")
599
+ print(f" Workspace: {ws_dest}")
600
+ print(f" Lakehouse: {lakehouse}")
601
+ print(f" Schema: {schema}")
602
+ print(f" New Model Name: {new_model_name}")
603
+
604
+ # Step 1: Download BIM from source
605
+ print("\n" + "-" * 70)
606
+ print("[Step 1/2] Downloading BIM from source workspace...")
607
+ print("-" * 70)
608
+
609
+ client = FabricRestClient()
610
+ ws_source_id = get_workspace_id(ws_source, client)
611
+
612
+ # Use temporary file for BIM content
613
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.bim', delete=False, encoding='utf-8') as tmp_file:
614
+ temp_bim_path = tmp_file.name
615
+
616
+ # Get semantic model ID
617
+ response = client.get(f"/v1/workspaces/{ws_source_id}/semanticModels")
618
+ models = response.json().get('value', [])
619
+ model = next((m for m in models if m.get('displayName') == model_name), None)
620
+
621
+ if not model:
622
+ raise ValueError(f"Semantic model '{model_name}' not found in workspace '{ws_source}'")
623
+
624
+ model_id = model.get('id')
625
+ print(f"✓ Found source model: {model_name} (ID: {model_id})")
626
+
627
+ # Get definition using Items API with TMSL format
628
+ print(" Downloading BIM definition...")
629
+ definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{ws_source_id}/items/{model_id}/getDefinition"
630
+ headers = client._get_headers()
631
+ response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
632
+ response.raise_for_status()
633
+
634
+ # Handle long-running operation
635
+ if response.status_code == 202:
636
+ operation_id = response.headers.get('x-ms-operation-id')
637
+ max_attempts = 30
638
+
639
+ for attempt in range(max_attempts):
640
+ time.sleep(2)
641
+
642
+ status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
643
+ status_response = requests.get(status_url, headers=headers)
644
+ status = status_response.json().get('status')
645
+
646
+ if status == 'Succeeded':
647
+ result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
648
+ result_response = requests.get(result_url, headers=headers)
649
+ result_data = result_response.json()
650
+ break
651
+ elif status == 'Failed':
652
+ error = status_response.json().get('error', {})
653
+ raise Exception(f"Download operation failed: {error.get('message')}")
654
+ elif attempt == max_attempts - 1:
655
+ raise Exception("Download operation timed out")
656
+ else:
657
+ result_data = response.json()
658
+
659
+ # Extract BIM content
660
+ definition = result_data.get('definition', {})
661
+ parts = definition.get('parts', [])
662
+
663
+ bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
664
+ if not bim_part:
665
+ raise Exception("No BIM file found in semantic model definition")
666
+
667
+ # Decode and save BIM
668
+ import base64
669
+ bim_payload = bim_part.get('payload', '')
670
+ bim_content = base64.b64decode(bim_payload).decode('utf-8')
671
+ bim_json = json.loads(bim_content)
672
+
673
+ # Write to temp file
674
+ json.dump(bim_json, tmp_file, indent=2)
675
+
676
+ print(f"✓ BIM downloaded successfully")
677
+ print(f" - Tables: {len(bim_json.get('model', {}).get('tables', []))}")
678
+ print(f" - Relationships: {len(bim_json.get('model', {}).get('relationships', []))}")
679
+
680
+ # Step 2: Deploy to destination
681
+ print("\n" + "-" * 70)
682
+ print("[Step 2/2] Deploying to destination workspace...")
683
+ print("-" * 70)
684
+
685
+ result = deploy_semantic_model(
686
+ workspace_name_or_id=ws_dest,
687
+ lakehouse_name_or_id=lakehouse,
688
+ schema_name=schema,
689
+ dataset_name=new_model_name,
690
+ bim_url_or_path=temp_bim_path,
691
+ wait_seconds=wait_seconds
692
+ )
693
+
694
+ # Clean up temp file
695
+ try:
696
+ os.unlink(temp_bim_path)
697
+ except:
698
+ pass
699
+
700
+ if result == 1:
701
+ print("\n" + "=" * 70)
702
+ print("🎉 Copy Operation Completed!")
703
+ print("=" * 70)
704
+ print(f"Source: {ws_source}/{model_name}")
705
+ print(f"Destination: {ws_dest}/{lakehouse}/{schema}/{new_model_name}")
706
+ print("=" * 70)
707
+
708
+ return result
709
+
710
+ except Exception as e:
711
+ print("\n" + "=" * 70)
712
+ print("❌ Copy Operation Failed")
713
+ print("=" * 70)
714
+ print(f"Error: {str(e)}")
715
+ print("\n💡 Troubleshooting:")
716
+ print(f" - Verify source workspace '{ws_source}' and model '{model_name}' exist")
717
+ print(f" - Verify destination workspace and lakehouse exist")
718
+ print(f" - Ensure you have permissions for both workspaces")
719
+ print("=" * 70)
720
+ return 0
721
+
duckrun/stats.py CHANGED
@@ -4,6 +4,7 @@ Delta Lake table statistics functionality for duckrun
4
4
  import duckdb
5
5
  from deltalake import DeltaTable
6
6
  from datetime import datetime
7
+ import pyarrow as pa
7
8
 
8
9
 
9
10
  def _table_exists(duckrun_instance, schema_name: str, table_name: str) -> bool:
@@ -149,17 +150,23 @@ def get_stats(duckrun_instance, source: str):
149
150
  dt = DeltaTable(table_path)
150
151
  add_actions = dt.get_add_actions(flatten=True)
151
152
 
152
- # Convert to dict - compatible with both old and new deltalake versions
153
- # Try to_pydict() first (old versions), fall back to to_pylist() (new versions)
153
+ # Convert RecordBatch to dict - works with both PyArrow (deltalake 0.18.2) and arro3 (newer versions)
154
+ # Strategy: Use duck typing - try direct conversion first, then manual extraction
155
+ # This works because both PyArrow and arro3 RecordBatches have schema and column() methods
156
+
154
157
  try:
158
+ # Old deltalake (0.18.2): PyArrow RecordBatch has to_pydict() directly
155
159
  xx = add_actions.to_pydict()
156
160
  except AttributeError:
157
- # New version with arro3: use to_pylist() and convert to dict of lists
158
- records = add_actions.to_pylist()
159
- if records:
160
- # Convert list of dicts to dict of lists
161
- xx = {key: [record[key] for record in records] for key in records[0].keys()}
161
+ # New deltalake with arro3: Use schema and column() methods
162
+ # This is the universal approach that works with both PyArrow and arro3
163
+ if hasattr(add_actions, 'schema') and hasattr(add_actions, 'column'):
164
+ # Extract columns manually and create PyArrow table
165
+ arrow_table = pa.table({name: add_actions.column(name) for name in add_actions.schema.names})
166
+ xx = arrow_table.to_pydict()
162
167
  else:
168
+ # Fallback: empty dict (shouldn't happen)
169
+ print(f"Warning: Could not convert RecordBatch for table '{tbl}': Unexpected type {type(add_actions)}")
163
170
  xx = {}
164
171
 
165
172
  # Check if VORDER exists
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.9.dev5
3
+ Version: 0.2.10
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -420,6 +420,37 @@ success = con.run(pipeline) # Returns True only if ALL tasks succeed
420
420
 
421
421
  This prevents downstream tasks from processing incomplete or corrupted data.
422
422
 
423
+ ### Semantic Model Deployment
424
+
425
+ Deploy Power BI semantic models directly from BIM files using DirectLake mode:
426
+
427
+ ```python
428
+ # Connect to lakehouse
429
+ con = duckrun.connect("Analytics/Sales.lakehouse/dbo")
430
+
431
+ # Deploy with auto-generated name (lakehouse_schema)
432
+ con.deploy("https://raw.githubusercontent.com/user/repo/main/model.bim")
433
+
434
+ # Deploy with custom name
435
+ con.deploy(
436
+ "https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
437
+ dataset_name="Sales Analytics Model",
438
+ wait_seconds=10 # Wait for permission propagation
439
+ )
440
+ ```
441
+
442
+ **Features:**
443
+ - 🚀 **DirectLake Mode**: Deploys semantic models with DirectLake connection
444
+ - 🔄 **Automatic Configuration**: Auto-configures workspace, lakehouse, and schema connections
445
+ - 📦 **BIM from URL**: Load model definitions from GitHub or any accessible URL
446
+ - ⏱️ **Permission Handling**: Configurable wait time for permission propagation
447
+
448
+ **Use Cases:**
449
+ - Deploy semantic models as part of CI/CD pipelines
450
+ - Version control your semantic models in Git
451
+ - Automated model deployment across environments
452
+ - Streamline DirectLake model creation
453
+
423
454
  ### Delta Lake Optimization
424
455
 
425
456
  Duckrun automatically:
@@ -534,6 +565,12 @@ con.sql("""
534
565
 
535
566
  # 5. Download processed files for external systems
536
567
  con.download("processed_reports", "./exports", ['.csv'])
568
+
569
+ # 6. Deploy semantic model for Power BI
570
+ con.deploy(
571
+ "https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
572
+ dataset_name="Sales Analytics"
573
+ )
537
574
  ```
538
575
 
539
576
  **This example demonstrates:**
@@ -541,8 +578,9 @@ con.download("processed_reports", "./exports", ['.csv'])
541
578
  - 🔄 **Pipeline orchestration** with SQL and Python tasks
542
579
  - ⚡ **Fast data exploration** with DuckDB
543
580
  - 💾 **Delta table creation** with Spark-style API
544
- - **Schema evolution** and partitioning
545
- - �📤 **File downloads** from OneLake Files
581
+ - 🔀 **Schema evolution** and partitioning
582
+ - 📤 **File downloads** from OneLake Files
583
+ - 📊 **Semantic model deployment** with DirectLake
546
584
 
547
585
  ## Schema Evolution & Partitioning Guide
548
586
 
@@ -0,0 +1,14 @@
1
+ duckrun/__init__.py,sha256=cTj6KQ6hKmgu1z7k9nhDcO5lct049luxjx1V0QnymCo,235
2
+ duckrun/auth.py,sha256=qPaLQ7InlV9leA9r6E6VEeYavFFoBi0zSN8m_l1aoQs,9545
3
+ duckrun/core.py,sha256=g9WtvhROxFSo2Idb979fY5HhxbMm_x-tajc_zWMtqCU,46853
4
+ duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
5
+ duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
6
+ duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
7
+ duckrun/semantic_model.py,sha256=obzlN2-dbEW3JmDop-vrZGGGLi9u3ThhTbgtDjou7uY,29509
8
+ duckrun/stats.py,sha256=oKIjZ7u5cFVT63FuOl5UqoDsOG3098woSCn-uI6i_sQ,11084
9
+ duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
10
+ duckrun-0.2.10.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
11
+ duckrun-0.2.10.dist-info/METADATA,sha256=CwDyjJqyfBoISxZ1bfdojVVsP0HcrLylgqCTpMsC6e8,20624
12
+ duckrun-0.2.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
+ duckrun-0.2.10.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
14
+ duckrun-0.2.10.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- duckrun/__init__.py,sha256=cTj6KQ6hKmgu1z7k9nhDcO5lct049luxjx1V0QnymCo,235
2
- duckrun/auth.py,sha256=qPaLQ7InlV9leA9r6E6VEeYavFFoBi0zSN8m_l1aoQs,9545
3
- duckrun/core.py,sha256=CrWMgA1QHvVF2AAlTlBlQ7VfKsuakcqZa4VuX2WJmik,39279
4
- duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
5
- duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
6
- duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
7
- duckrun/semantic_model.py,sha256=jmTrS15WmhU3rQfdpLII1wm3EORdQfqQxOhqOSyXB_w,17305
8
- duckrun/stats.py,sha256=CXfb2DWF3PgOckelJooU0y-BAsNT9NFDfDYEmo0mUQQ,10473
9
- duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
10
- duckrun-0.2.9.dev5.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
11
- duckrun-0.2.9.dev5.dist-info/METADATA,sha256=r0oG0-EI5oE-UgfRWJskXHHc4u7uNhFrdPFsTrCtGwc,19277
12
- duckrun-0.2.9.dev5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
13
- duckrun-0.2.9.dev5.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
14
- duckrun-0.2.9.dev5.dist-info/RECORD,,