duckrun 0.2.9.dev4__tar.gz → 0.2.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/PKG-INFO +41 -3
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/README.md +40 -2
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/duckrun/__init__.py +1 -1
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/duckrun/core.py +162 -8
- duckrun-0.2.10/duckrun/semantic_model.py +721 -0
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/duckrun/stats.py +14 -7
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/duckrun.egg-info/PKG-INFO +41 -3
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/pyproject.toml +1 -1
- duckrun-0.2.9.dev4/duckrun/semantic_model.py +0 -427
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/LICENSE +0 -0
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/duckrun/auth.py +0 -0
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/duckrun/files.py +0 -0
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/duckrun/lakehouse.py +0 -0
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/duckrun/runner.py +0 -0
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/duckrun/writer.py +0 -0
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/duckrun.egg-info/SOURCES.txt +0 -0
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/duckrun.egg-info/requires.txt +0 -0
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.2.9.dev4 → duckrun-0.2.10}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: duckrun
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.10
|
4
4
|
Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
|
5
5
|
Author: mim
|
6
6
|
License: MIT
|
@@ -420,6 +420,37 @@ success = con.run(pipeline) # Returns True only if ALL tasks succeed
|
|
420
420
|
|
421
421
|
This prevents downstream tasks from processing incomplete or corrupted data.
|
422
422
|
|
423
|
+
### Semantic Model Deployment
|
424
|
+
|
425
|
+
Deploy Power BI semantic models directly from BIM files using DirectLake mode:
|
426
|
+
|
427
|
+
```python
|
428
|
+
# Connect to lakehouse
|
429
|
+
con = duckrun.connect("Analytics/Sales.lakehouse/dbo")
|
430
|
+
|
431
|
+
# Deploy with auto-generated name (lakehouse_schema)
|
432
|
+
con.deploy("https://raw.githubusercontent.com/user/repo/main/model.bim")
|
433
|
+
|
434
|
+
# Deploy with custom name
|
435
|
+
con.deploy(
|
436
|
+
"https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
|
437
|
+
dataset_name="Sales Analytics Model",
|
438
|
+
wait_seconds=10 # Wait for permission propagation
|
439
|
+
)
|
440
|
+
```
|
441
|
+
|
442
|
+
**Features:**
|
443
|
+
- 🚀 **DirectLake Mode**: Deploys semantic models with DirectLake connection
|
444
|
+
- 🔄 **Automatic Configuration**: Auto-configures workspace, lakehouse, and schema connections
|
445
|
+
- 📦 **BIM from URL**: Load model definitions from GitHub or any accessible URL
|
446
|
+
- ⏱️ **Permission Handling**: Configurable wait time for permission propagation
|
447
|
+
|
448
|
+
**Use Cases:**
|
449
|
+
- Deploy semantic models as part of CI/CD pipelines
|
450
|
+
- Version control your semantic models in Git
|
451
|
+
- Automated model deployment across environments
|
452
|
+
- Streamline DirectLake model creation
|
453
|
+
|
423
454
|
### Delta Lake Optimization
|
424
455
|
|
425
456
|
Duckrun automatically:
|
@@ -534,6 +565,12 @@ con.sql("""
|
|
534
565
|
|
535
566
|
# 5. Download processed files for external systems
|
536
567
|
con.download("processed_reports", "./exports", ['.csv'])
|
568
|
+
|
569
|
+
# 6. Deploy semantic model for Power BI
|
570
|
+
con.deploy(
|
571
|
+
"https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
|
572
|
+
dataset_name="Sales Analytics"
|
573
|
+
)
|
537
574
|
```
|
538
575
|
|
539
576
|
**This example demonstrates:**
|
@@ -541,8 +578,9 @@ con.download("processed_reports", "./exports", ['.csv'])
|
|
541
578
|
- 🔄 **Pipeline orchestration** with SQL and Python tasks
|
542
579
|
- ⚡ **Fast data exploration** with DuckDB
|
543
580
|
- 💾 **Delta table creation** with Spark-style API
|
544
|
-
-
|
545
|
-
-
|
581
|
+
- 🔀 **Schema evolution** and partitioning
|
582
|
+
- 📤 **File downloads** from OneLake Files
|
583
|
+
- 📊 **Semantic model deployment** with DirectLake
|
546
584
|
|
547
585
|
## Schema Evolution & Partitioning Guide
|
548
586
|
|
@@ -400,6 +400,37 @@ success = con.run(pipeline) # Returns True only if ALL tasks succeed
|
|
400
400
|
|
401
401
|
This prevents downstream tasks from processing incomplete or corrupted data.
|
402
402
|
|
403
|
+
### Semantic Model Deployment
|
404
|
+
|
405
|
+
Deploy Power BI semantic models directly from BIM files using DirectLake mode:
|
406
|
+
|
407
|
+
```python
|
408
|
+
# Connect to lakehouse
|
409
|
+
con = duckrun.connect("Analytics/Sales.lakehouse/dbo")
|
410
|
+
|
411
|
+
# Deploy with auto-generated name (lakehouse_schema)
|
412
|
+
con.deploy("https://raw.githubusercontent.com/user/repo/main/model.bim")
|
413
|
+
|
414
|
+
# Deploy with custom name
|
415
|
+
con.deploy(
|
416
|
+
"https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
|
417
|
+
dataset_name="Sales Analytics Model",
|
418
|
+
wait_seconds=10 # Wait for permission propagation
|
419
|
+
)
|
420
|
+
```
|
421
|
+
|
422
|
+
**Features:**
|
423
|
+
- 🚀 **DirectLake Mode**: Deploys semantic models with DirectLake connection
|
424
|
+
- 🔄 **Automatic Configuration**: Auto-configures workspace, lakehouse, and schema connections
|
425
|
+
- 📦 **BIM from URL**: Load model definitions from GitHub or any accessible URL
|
426
|
+
- ⏱️ **Permission Handling**: Configurable wait time for permission propagation
|
427
|
+
|
428
|
+
**Use Cases:**
|
429
|
+
- Deploy semantic models as part of CI/CD pipelines
|
430
|
+
- Version control your semantic models in Git
|
431
|
+
- Automated model deployment across environments
|
432
|
+
- Streamline DirectLake model creation
|
433
|
+
|
403
434
|
### Delta Lake Optimization
|
404
435
|
|
405
436
|
Duckrun automatically:
|
@@ -514,6 +545,12 @@ con.sql("""
|
|
514
545
|
|
515
546
|
# 5. Download processed files for external systems
|
516
547
|
con.download("processed_reports", "./exports", ['.csv'])
|
548
|
+
|
549
|
+
# 6. Deploy semantic model for Power BI
|
550
|
+
con.deploy(
|
551
|
+
"https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
|
552
|
+
dataset_name="Sales Analytics"
|
553
|
+
)
|
517
554
|
```
|
518
555
|
|
519
556
|
**This example demonstrates:**
|
@@ -521,8 +558,9 @@ con.download("processed_reports", "./exports", ['.csv'])
|
|
521
558
|
- 🔄 **Pipeline orchestration** with SQL and Python tasks
|
522
559
|
- ⚡ **Fast data exploration** with DuckDB
|
523
560
|
- 💾 **Delta table creation** with Spark-style API
|
524
|
-
-
|
525
|
-
-
|
561
|
+
- 🔀 **Schema evolution** and partitioning
|
562
|
+
- 📤 **File downloads** from OneLake Files
|
563
|
+
- 📊 **Semantic model deployment** with DirectLake
|
526
564
|
|
527
565
|
## Schema Evolution & Partitioning Guide
|
528
566
|
|
@@ -2,6 +2,8 @@ import duckdb
|
|
2
2
|
import requests
|
3
3
|
import os
|
4
4
|
import importlib.util
|
5
|
+
import json
|
6
|
+
import time
|
5
7
|
from deltalake import DeltaTable, write_deltalake
|
6
8
|
from typing import List, Tuple, Union, Optional, Callable, Dict, Any
|
7
9
|
from string import Template
|
@@ -702,8 +704,11 @@ class Duckrun:
|
|
702
704
|
Deploy a semantic model from a BIM file using DirectLake mode.
|
703
705
|
|
704
706
|
Args:
|
705
|
-
bim_url:
|
706
|
-
|
707
|
+
bim_url: Can be:
|
708
|
+
- URL: "https://raw.githubusercontent.com/.../model.bim"
|
709
|
+
- Local file: "model.bim"
|
710
|
+
- Workspace/Model: "workspace_name/model_name"
|
711
|
+
dataset_name: Name for the semantic model (default: source model name if workspace/model format, else lakehouse_schema)
|
707
712
|
wait_seconds: Seconds to wait for permission propagation (default: 5)
|
708
713
|
|
709
714
|
Returns:
|
@@ -712,18 +717,28 @@ class Duckrun:
|
|
712
717
|
Examples:
|
713
718
|
dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
|
714
719
|
|
715
|
-
# Deploy
|
716
|
-
dr.deploy("
|
720
|
+
# Deploy from workspace/model (uses same name by default)
|
721
|
+
dr.deploy("Source Workspace/Source Model") # Creates "Source Model"
|
717
722
|
|
718
723
|
# Deploy with custom name
|
719
|
-
dr.deploy("
|
720
|
-
|
724
|
+
dr.deploy("Source Workspace/Source Model", dataset_name="Sales Model Copy")
|
725
|
+
|
726
|
+
# Deploy from URL or local file
|
727
|
+
dr.deploy("https://raw.githubusercontent.com/.../model.bim", dataset_name="My Model")
|
721
728
|
"""
|
722
729
|
from .semantic_model import deploy_semantic_model
|
723
730
|
|
724
731
|
# Auto-generate dataset name if not provided
|
725
732
|
if dataset_name is None:
|
726
|
-
|
733
|
+
# If using workspace/model format, use the model name
|
734
|
+
if "/" in bim_url and not bim_url.startswith(('http://', 'https://')):
|
735
|
+
parts = bim_url.split("/")
|
736
|
+
if len(parts) == 2:
|
737
|
+
dataset_name = parts[1] # Use the model name
|
738
|
+
else:
|
739
|
+
dataset_name = f"{self.lakehouse_name}_{self.schema}"
|
740
|
+
else:
|
741
|
+
dataset_name = f"{self.lakehouse_name}_{self.schema}"
|
727
742
|
|
728
743
|
# Call the deployment function (DirectLake only)
|
729
744
|
return deploy_semantic_model(
|
@@ -731,7 +746,7 @@ class Duckrun:
|
|
731
746
|
lakehouse_name_or_id=self.lakehouse_name,
|
732
747
|
schema_name=self.schema,
|
733
748
|
dataset_name=dataset_name,
|
734
|
-
|
749
|
+
bim_url_or_path=bim_url,
|
735
750
|
wait_seconds=wait_seconds
|
736
751
|
)
|
737
752
|
|
@@ -864,6 +879,145 @@ class WorkspaceConnection:
|
|
864
879
|
print(f"❌ Error creating lakehouse '{lakehouse_name}': {e}")
|
865
880
|
return False
|
866
881
|
|
882
|
+
def download_bim(self, semantic_model_name: str, output_path: Optional[str] = None) -> Optional[str]:
|
883
|
+
"""
|
884
|
+
Download a semantic model as a BIM (Business Intelligence Model) file.
|
885
|
+
|
886
|
+
Args:
|
887
|
+
semantic_model_name: Name of the semantic model to download
|
888
|
+
output_path: Optional path to save the BIM file. If not provided, returns the BIM content as JSON string
|
889
|
+
|
890
|
+
Returns:
|
891
|
+
BIM content as JSON string if output_path is None, or the file path if saved successfully
|
892
|
+
|
893
|
+
Example:
|
894
|
+
con = duckrun.connect("My Workspace")
|
895
|
+
# Get BIM content as string
|
896
|
+
bim_content = con.download_bim("Sales Model")
|
897
|
+
# Or save to file
|
898
|
+
con.download_bim("Sales Model", "sales_model.bim")
|
899
|
+
"""
|
900
|
+
try:
|
901
|
+
# Get authentication token
|
902
|
+
from .auth import get_fabric_api_token
|
903
|
+
token = get_fabric_api_token()
|
904
|
+
if not token:
|
905
|
+
print("❌ Failed to authenticate for downloading semantic model")
|
906
|
+
return None
|
907
|
+
|
908
|
+
# Resolve workspace name to ID
|
909
|
+
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
910
|
+
if not workspace_id:
|
911
|
+
print(f"❌ Workspace '{self.workspace_name}' not found")
|
912
|
+
return None
|
913
|
+
|
914
|
+
# Get semantic model ID
|
915
|
+
print(f"🔍 Looking for semantic model '{semantic_model_name}'...")
|
916
|
+
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/semanticModels"
|
917
|
+
headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
|
918
|
+
|
919
|
+
response = requests.get(url, headers=headers)
|
920
|
+
response.raise_for_status()
|
921
|
+
|
922
|
+
models = response.json().get("value", [])
|
923
|
+
model = next((m for m in models if m.get("displayName") == semantic_model_name), None)
|
924
|
+
|
925
|
+
if not model:
|
926
|
+
print(f"❌ Semantic model '{semantic_model_name}' not found in workspace '{self.workspace_name}'")
|
927
|
+
return None
|
928
|
+
|
929
|
+
model_id = model.get("id")
|
930
|
+
print(f"✓ Found semantic model: {semantic_model_name} (ID: {model_id})")
|
931
|
+
|
932
|
+
# Get the model definition using the generic items API
|
933
|
+
print("📥 Downloading BIM definition...")
|
934
|
+
definition_url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items/{model_id}/getDefinition"
|
935
|
+
|
936
|
+
# POST request to get definition with TMSL format (which includes model.bim)
|
937
|
+
# Note: format parameter should be in query string, not body
|
938
|
+
response = requests.post(f"{definition_url}?format=TMSL", headers=headers)
|
939
|
+
response.raise_for_status()
|
940
|
+
|
941
|
+
# Handle long-running operation if needed
|
942
|
+
if response.status_code == 202:
|
943
|
+
operation_id = response.headers.get('x-ms-operation-id')
|
944
|
+
print(f" Waiting for operation to complete...")
|
945
|
+
|
946
|
+
max_attempts = 30
|
947
|
+
for attempt in range(max_attempts):
|
948
|
+
time.sleep(2)
|
949
|
+
|
950
|
+
# Get operation result
|
951
|
+
result_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}/result"
|
952
|
+
result_response = requests.get(result_url, headers=headers)
|
953
|
+
|
954
|
+
# Check operation status
|
955
|
+
status_url = f"https://api.fabric.microsoft.com/v1/operations/{operation_id}"
|
956
|
+
status_response = requests.get(status_url, headers=headers)
|
957
|
+
status = status_response.json().get('status')
|
958
|
+
|
959
|
+
if status == 'Succeeded':
|
960
|
+
result_data = result_response.json()
|
961
|
+
break
|
962
|
+
elif status == 'Failed':
|
963
|
+
error = status_response.json().get('error', {})
|
964
|
+
print(f"❌ Operation failed: {error.get('message')}")
|
965
|
+
return None
|
966
|
+
elif attempt == max_attempts - 1:
|
967
|
+
print("❌ Operation timed out")
|
968
|
+
return None
|
969
|
+
else:
|
970
|
+
result_data = response.json()
|
971
|
+
|
972
|
+
# Extract BIM content from definition
|
973
|
+
definition = result_data.get('definition', {})
|
974
|
+
parts = definition.get('parts', [])
|
975
|
+
|
976
|
+
# Debug: show what parts we have
|
977
|
+
if not parts:
|
978
|
+
print("❌ No definition parts found in response")
|
979
|
+
print(f" Result data keys: {list(result_data.keys())}")
|
980
|
+
print(f" Definition keys: {list(definition.keys()) if definition else 'None'}")
|
981
|
+
return None
|
982
|
+
|
983
|
+
print(f" Found {len(parts)} definition parts:")
|
984
|
+
for part in parts:
|
985
|
+
print(f" - {part.get('path', 'unknown')}")
|
986
|
+
|
987
|
+
bim_part = next((p for p in parts if p.get('path', '').endswith('.bim')), None)
|
988
|
+
if not bim_part:
|
989
|
+
print("❌ No BIM file found in semantic model definition")
|
990
|
+
print(f" Looking for files ending with '.bim', found: {[p.get('path') for p in parts]}")
|
991
|
+
return None
|
992
|
+
|
993
|
+
# Decode the BIM content (it's base64 encoded)
|
994
|
+
import base64
|
995
|
+
bim_payload = bim_part.get('payload', '')
|
996
|
+
bim_content = base64.b64decode(bim_payload).decode('utf-8')
|
997
|
+
bim_json = json.loads(bim_content)
|
998
|
+
|
999
|
+
# Format as pretty JSON
|
1000
|
+
bim_formatted = json.dumps(bim_json, indent=2)
|
1001
|
+
|
1002
|
+
print(f"✓ BIM file downloaded successfully")
|
1003
|
+
print(f" - Tables: {len(bim_json.get('model', {}).get('tables', []))}")
|
1004
|
+
print(f" - Relationships: {len(bim_json.get('model', {}).get('relationships', []))}")
|
1005
|
+
|
1006
|
+
# Save to file or return content
|
1007
|
+
if output_path:
|
1008
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
1009
|
+
f.write(bim_formatted)
|
1010
|
+
print(f"✓ Saved to: {output_path}")
|
1011
|
+
return output_path
|
1012
|
+
else:
|
1013
|
+
return bim_formatted
|
1014
|
+
|
1015
|
+
except Exception as e:
|
1016
|
+
print(f"❌ Error downloading semantic model: {e}")
|
1017
|
+
import traceback
|
1018
|
+
traceback.print_exc()
|
1019
|
+
return None
|
1020
|
+
|
867
1021
|
def _get_workspace_id_by_name(self, token: str, workspace_name: str) -> Optional[str]:
|
868
1022
|
"""Helper method to get workspace ID from name"""
|
869
1023
|
try:
|