duckrun 0.2.9.dev4__tar.gz → 0.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.9.dev4
3
+ Version: 0.2.10
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -420,6 +420,37 @@ success = con.run(pipeline) # Returns True only if ALL tasks succeed
420
420
 
421
421
  This prevents downstream tasks from processing incomplete or corrupted data.
422
422
 
423
+ ### Semantic Model Deployment
424
+
425
+ Deploy Power BI semantic models directly from BIM files using DirectLake mode:
426
+
427
+ ```python
428
+ # Connect to lakehouse
429
+ con = duckrun.connect("Analytics/Sales.lakehouse/dbo")
430
+
431
+ # Deploy with auto-generated name (lakehouse_schema)
432
+ con.deploy("https://raw.githubusercontent.com/user/repo/main/model.bim")
433
+
434
+ # Deploy with custom name
435
+ con.deploy(
436
+ "https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
437
+ dataset_name="Sales Analytics Model",
438
+ wait_seconds=10 # Wait for permission propagation
439
+ )
440
+ ```
441
+
442
+ **Features:**
443
+ - 🚀 **DirectLake Mode**: Deploys semantic models with DirectLake connection
444
+ - 🔄 **Automatic Configuration**: Auto-configures workspace, lakehouse, and schema connections
445
+ - 📦 **BIM from URL**: Load model definitions from GitHub or any accessible URL
446
+ - ⏱️ **Permission Handling**: Configurable wait time for permission propagation
447
+
448
+ **Use Cases:**
449
+ - Deploy semantic models as part of CI/CD pipelines
450
+ - Version control your semantic models in Git
451
+ - Automated model deployment across environments
452
+ - Streamline DirectLake model creation
453
+
423
454
  ### Delta Lake Optimization
424
455
 
425
456
  Duckrun automatically:
@@ -534,6 +565,12 @@ con.sql("""
534
565
 
535
566
  # 5. Download processed files for external systems
536
567
  con.download("processed_reports", "./exports", ['.csv'])
568
+
569
+ # 6. Deploy semantic model for Power BI
570
+ con.deploy(
571
+ "https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
572
+ dataset_name="Sales Analytics"
573
+ )
537
574
  ```
538
575
 
539
576
  **This example demonstrates:**
@@ -541,8 +578,9 @@ con.download("processed_reports", "./exports", ['.csv'])
541
578
  - 🔄 **Pipeline orchestration** with SQL and Python tasks
542
579
  - ⚡ **Fast data exploration** with DuckDB
543
580
  - 💾 **Delta table creation** with Spark-style API
544
- - **Schema evolution** and partitioning
545
- - �📤 **File downloads** from OneLake Files
581
+ - 🔀 **Schema evolution** and partitioning
582
+ - 📤 **File downloads** from OneLake Files
583
+ - 📊 **Semantic model deployment** with DirectLake
546
584
 
547
585
  ## Schema Evolution & Partitioning Guide
548
586
 
@@ -400,6 +400,37 @@ success = con.run(pipeline) # Returns True only if ALL tasks succeed
400
400
 
401
401
  This prevents downstream tasks from processing incomplete or corrupted data.
402
402
 
403
+ ### Semantic Model Deployment
404
+
405
+ Deploy Power BI semantic models directly from BIM files using DirectLake mode:
406
+
407
+ ```python
408
+ # Connect to lakehouse
409
+ con = duckrun.connect("Analytics/Sales.lakehouse/dbo")
410
+
411
+ # Deploy with auto-generated name (lakehouse_schema)
412
+ con.deploy("https://raw.githubusercontent.com/user/repo/main/model.bim")
413
+
414
+ # Deploy with custom name
415
+ con.deploy(
416
+ "https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
417
+ dataset_name="Sales Analytics Model",
418
+ wait_seconds=10 # Wait for permission propagation
419
+ )
420
+ ```
421
+
422
+ **Features:**
423
+ - 🚀 **DirectLake Mode**: Deploys semantic models with DirectLake connection
424
+ - 🔄 **Automatic Configuration**: Auto-configures workspace, lakehouse, and schema connections
425
+ - 📦 **BIM from URL**: Load model definitions from GitHub or any accessible URL
426
+ - ⏱️ **Permission Handling**: Configurable wait time for permission propagation
427
+
428
+ **Use Cases:**
429
+ - Deploy semantic models as part of CI/CD pipelines
430
+ - Version control your semantic models in Git
431
+ - Automated model deployment across environments
432
+ - Streamline DirectLake model creation
433
+
403
434
  ### Delta Lake Optimization
404
435
 
405
436
  Duckrun automatically:
@@ -514,6 +545,12 @@ con.sql("""
514
545
 
515
546
  # 5. Download processed files for external systems
516
547
  con.download("processed_reports", "./exports", ['.csv'])
548
+
549
+ # 6. Deploy semantic model for Power BI
550
+ con.deploy(
551
+ "https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
552
+ dataset_name="Sales Analytics"
553
+ )
517
554
  ```
518
555
 
519
556
  **This example demonstrates:**
@@ -521,8 +558,9 @@ con.download("processed_reports", "./exports", ['.csv'])
521
558
  - 🔄 **Pipeline orchestration** with SQL and Python tasks
522
559
  - ⚡ **Fast data exploration** with DuckDB
523
560
  - 💾 **Delta table creation** with Spark-style API
524
- - **Schema evolution** and partitioning
525
- - �📤 **File downloads** from OneLake Files
561
+ - 🔀 **Schema evolution** and partitioning
562
+ - 📤 **File downloads** from OneLake Files
563
+ - 📊 **Semantic model deployment** with DirectLake
526
564
 
527
565
  ## Schema Evolution & Partitioning Guide
528
566
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  from duckrun.core import Duckrun
4
4
 
5
- __version__ = "0.2.9.dev4"
5
+ __version__ = "0.2.9.dev5"
6
6
 
7
7
  # Expose unified connect method at module level
8
8
  connect = Duckrun.connect
@@ -2,6 +2,8 @@ import duckdb
2
2
  import requests
3
3
  import os
4
4
  import importlib.util
5
+ import json
6
+ import time
5
7
  from deltalake import DeltaTable, write_deltalake
6
8
  from typing import List, Tuple, Union, Optional, Callable, Dict, Any
7
9
  from string import Template
@@ -702,8 +704,11 @@ class Duckrun:
702
704
  Deploy a semantic model from a BIM file using DirectLake mode.
703
705
 
704
706
  Args:
705
- bim_url: URL to the BIM file (e.g., GitHub raw URL)
706
- dataset_name: Name for the semantic model (default: lakehouse_schema)
707
+ bim_url: Can be:
708
+ - URL: "https://raw.githubusercontent.com/.../model.bim"
709
+ - Local file: "model.bim"
710
+ - Workspace/Model: "workspace_name/model_name"
711
+ dataset_name: Name for the semantic model (default: source model name if workspace/model format, else lakehouse_schema)
707
712
  wait_seconds: Seconds to wait for permission propagation (default: 5)
708
713
 
709
714
  Returns:
@@ -712,18 +717,28 @@ class Duckrun:
712
717
  Examples:
713
718
  dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
714
719
 
715
- # Deploy with auto-generated name
716
- dr.deploy("https://raw.githubusercontent.com/.../model.bim")
720
+ # Deploy from workspace/model (uses same name by default)
721
+ dr.deploy("Source Workspace/Source Model") # Creates "Source Model"
717
722
 
718
723
  # Deploy with custom name
719
- dr.deploy("https://raw.githubusercontent.com/.../model.bim",
720
- dataset_name="Sales Model")
724
+ dr.deploy("Source Workspace/Source Model", dataset_name="Sales Model Copy")
725
+
726
+ # Deploy from URL or local file
727
+ dr.deploy("https://raw.githubusercontent.com/.../model.bim", dataset_name="My Model")
721
728
  """
722
729
  from .semantic_model import deploy_semantic_model
723
730
 
724
731
  # Auto-generate dataset name if not provided
725
732
  if dataset_name is None:
726
- dataset_name = f"{self.lakehouse_name}_{self.schema}"
733
+ # If using workspace/model format, use the model name
734
+ if "/" in bim_url and not bim_url.startswith(('http://', 'https://')):
735
+ parts = bim_url.split("/")
736
+ if len(parts) == 2:
737
+ dataset_name = parts[1] # Use the model name
738
+ else:
739
+ dataset_name = f"{self.lakehouse_name}_{self.schema}"
740
+ else:
741
+ dataset_name = f"{self.lakehouse_name}_{self.schema}"
727
742
 
728
743
  # Call the deployment function (DirectLake only)
729
744
  return deploy_semantic_model(
@@ -731,7 +746,7 @@ class Duckrun:
731
746
  lakehouse_name_or_id=self.lakehouse_name,
732
747
  schema_name=self.schema,
733
748
  dataset_name=dataset_name,
734
- bim_url=bim_url,
749
+ bim_url_or_path=bim_url,
735
750
  wait_seconds=wait_seconds
736
751
  )
737
752
 
@@ -864,6 +879,145 @@ class WorkspaceConnection:
864
879
  print(f"❌ Error creating lakehouse '{lakehouse_name}': {e}")
865
880
  return False
866
881
 
882
+ def download_bim(self, semantic_model_name: str, output_path: Optional[str] = None) -> Optional[str]:
883
+ """
884
+ Download a semantic model as a BIM (Business Intelligence Model) file.
885
+
886
+ Args:
887
+ semantic_model_name: Name of the semantic model to download
888
+ output_path: Optional path to save the BIM file. If not provided, returns the BIM content as JSON string
889
+
890
+ Returns:
891
+ BIM content as JSON string if output_path is None, or the file path if saved successfully
892
+
893
+ Example:
894
+ con = duckrun.connect("My Workspace")
895
+ # Get BIM content as string
896
+ bim_content = con.download_bim("Sales Model")
897
+ # Or save to file
898
+ con.download_bim("Sales Model", "sales_model.bim")
899
+ """
900
+ try:
901
+ # Get authentication token
902
+ from .auth import get_fabric_api_token
903
+ token = get_fabric_api_token()
904
+ if not token:
905
+ print("❌ Failed to authenticate for downloading semantic model")
906
+ return None
907
+
908
+ # Resolve workspace name to ID
909
+ workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
910
+ if not workspace_id:
911
+ print(f"❌ Workspace '{self.workspace_name}' not found")
912
+ return None
913
+
914
+ # Get semantic model ID
915
+ print(f"🔍 Looking for semantic model '{semantic_model_name}'...")
916
+ url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/semanticModels"
917
+ headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
918
+
919
+ response = requests.get(url, headers=headers)
920
+ response.raise_for_status()
921
+
922
+ models = response.json().get("value", [])
923
+ model = next((m for m in models if m.get("displayName") == semantic_model_name), None)
924
+
925
+ if not model:
926
+ print(f"❌ Semantic model '{semantic_model_name}' not found in workspace '{self.workspace_name}'")
927
+ return None
928
+
929
+ model_id = model.get("id")
930
+ print(f"✓ Found semantic model: {semantic_model_name} (ID: {model_id})")
931
+
932
+ # Get the model definition using the generic items API
933
+ print("📥 Downloading BIM definition...")
934
+ definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{model_id}/getDefinition"
935
+
936
+ # POST request to get definition with TMSL format (which includes model.bim)
937
+ # Note: format parameter should be in query string, not body
938
+ response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
939
+ response.raise_for_status()
940
+
941
+ # Handle long-running operation if needed
942
+ if response.status_code == 202:
943
+ operation_id = response.headers.get('x-ms-operation-id')
944
+ print(f" Waiting for operation to complete...")
945
+
946
+ max_attempts = 30
947
+ for attempt in range(max_attempts):
948
+ time.sleep(2)
949
+
950
+ # Get operation result
951
+ result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
952
+ result_response = requests.get(result_url, headers=headers)
953
+
954
+ # Check operation status
955
+ status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
956
+ status_response = requests.get(status_url, headers=headers)
957
+ status = status_response.json().get('status')
958
+
959
+ if status == 'Succeeded':
960
+ result_data = result_response.json()
961
+ break
962
+ elif status == 'Failed':
963
+ error = status_response.json().get('error', {})
964
+ print(f"❌ Operation failed: {error.get('message')}")
965
+ return None
966
+ elif attempt == max_attempts - 1:
967
+ print("❌ Operation timed out")
968
+ return None
969
+ else:
970
+ result_data = response.json()
971
+
972
+ # Extract BIM content from definition
973
+ definition = result_data.get('definition', {})
974
+ parts = definition.get('parts', [])
975
+
976
+ # Debug: show what parts we have
977
+ if not parts:
978
+ print("❌ No definition parts found in response")
979
+ print(f" Result data keys: {list(result_data.keys())}")
980
+ print(f" Definition keys: {list(definition.keys()) if definition else 'None'}")
981
+ return None
982
+
983
+ print(f" Found {len(parts)} definition parts:")
984
+ for part in parts:
985
+ print(f" - {part.get('path', 'unknown')}")
986
+
987
+ bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
988
+ if not bim_part:
989
+ print("❌ No BIM file found in semantic model definition")
990
+ print(f" Looking for files ending with '.bim', found: {[p.get('path') for p in parts]}")
991
+ return None
992
+
993
+ # Decode the BIM content (it's base64 encoded)
994
+ import base64
995
+ bim_payload = bim_part.get('payload', '')
996
+ bim_content = base64.b64decode(bim_payload).decode('utf-8')
997
+ bim_json = json.loads(bim_content)
998
+
999
+ # Format as pretty JSON
1000
+ bim_formatted = json.dumps(bim_json, indent=2)
1001
+
1002
+ print(f"✓ BIM file downloaded successfully")
1003
+ print(f" - Tables: {len(bim_json.get('model', {}).get('tables', []))}")
1004
+ print(f" - Relationships: {len(bim_json.get('model', {}).get('relationships', []))}")
1005
+
1006
+ # Save to file or return content
1007
+ if output_path:
1008
+ with open(output_path, 'w', encoding='utf-8') as f:
1009
+ f.write(bim_formatted)
1010
+ print(f"✓ Saved to: {output_path}")
1011
+ return output_path
1012
+ else:
1013
+ return bim_formatted
1014
+
1015
+ except Exception as e:
1016
+ print(f"❌ Error downloading semantic model: {e}")
1017
+ import traceback
1018
+ traceback.print_exc()
1019
+ return None
1020
+
867
1021
  def _get_workspace_id_by_name(self, token: str, workspace_name: str) -> Optional[str]:
868
1022
  """Helper method to get workspace ID from name"""
869
1023
  try: