duckrun 0.2.7__tar.gz → 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {duckrun-0.2.7 → duckrun-0.2.9}/PKG-INFO +41 -3
- {duckrun-0.2.7 → duckrun-0.2.9}/README.md +40 -2
- {duckrun-0.2.7 → duckrun-0.2.9}/duckrun/__init__.py +1 -1
- duckrun-0.2.9/duckrun/auth.py +249 -0
- {duckrun-0.2.7 → duckrun-0.2.9}/duckrun/core.py +122 -83
- {duckrun-0.2.7 → duckrun-0.2.9}/duckrun/files.py +14 -14
- {duckrun-0.2.7 → duckrun-0.2.9}/duckrun/runner.py +12 -5
- duckrun-0.2.9/duckrun/semantic_model.py +434 -0
- {duckrun-0.2.7 → duckrun-0.2.9}/duckrun/stats.py +14 -1
- {duckrun-0.2.7 → duckrun-0.2.9}/duckrun/writer.py +49 -12
- {duckrun-0.2.7 → duckrun-0.2.9}/duckrun.egg-info/PKG-INFO +41 -3
- {duckrun-0.2.7 → duckrun-0.2.9}/duckrun.egg-info/SOURCES.txt +2 -0
- {duckrun-0.2.7 → duckrun-0.2.9}/pyproject.toml +1 -1
- {duckrun-0.2.7 → duckrun-0.2.9}/LICENSE +0 -0
- {duckrun-0.2.7 → duckrun-0.2.9}/duckrun/lakehouse.py +0 -0
- {duckrun-0.2.7 → duckrun-0.2.9}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.2.7 → duckrun-0.2.9}/duckrun.egg-info/requires.txt +0 -0
- {duckrun-0.2.7 → duckrun-0.2.9}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.2.7 → duckrun-0.2.9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: duckrun
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.9
|
4
4
|
Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
|
5
5
|
Author: mim
|
6
6
|
License: MIT
|
@@ -420,6 +420,37 @@ success = con.run(pipeline) # Returns True only if ALL tasks succeed
|
|
420
420
|
|
421
421
|
This prevents downstream tasks from processing incomplete or corrupted data.
|
422
422
|
|
423
|
+
### Semantic Model Deployment
|
424
|
+
|
425
|
+
Deploy Power BI semantic models directly from BIM files using DirectLake mode:
|
426
|
+
|
427
|
+
```python
|
428
|
+
# Connect to lakehouse
|
429
|
+
con = duckrun.connect("Analytics/Sales.lakehouse/dbo")
|
430
|
+
|
431
|
+
# Deploy with auto-generated name (lakehouse_schema)
|
432
|
+
con.deploy("https://raw.githubusercontent.com/user/repo/main/model.bim")
|
433
|
+
|
434
|
+
# Deploy with custom name
|
435
|
+
con.deploy(
|
436
|
+
"https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
|
437
|
+
dataset_name="Sales Analytics Model",
|
438
|
+
wait_seconds=10 # Wait for permission propagation
|
439
|
+
)
|
440
|
+
```
|
441
|
+
|
442
|
+
**Features:**
|
443
|
+
- 🚀 **DirectLake Mode**: Deploys semantic models with DirectLake connection
|
444
|
+
- 🔄 **Automatic Configuration**: Auto-configures workspace, lakehouse, and schema connections
|
445
|
+
- 📦 **BIM from URL**: Load model definitions from GitHub or any accessible URL
|
446
|
+
- ⏱️ **Permission Handling**: Configurable wait time for permission propagation
|
447
|
+
|
448
|
+
**Use Cases:**
|
449
|
+
- Deploy semantic models as part of CI/CD pipelines
|
450
|
+
- Version control your semantic models in Git
|
451
|
+
- Automated model deployment across environments
|
452
|
+
- Streamline DirectLake model creation
|
453
|
+
|
423
454
|
### Delta Lake Optimization
|
424
455
|
|
425
456
|
Duckrun automatically:
|
@@ -534,6 +565,12 @@ con.sql("""
|
|
534
565
|
|
535
566
|
# 5. Download processed files for external systems
|
536
567
|
con.download("processed_reports", "./exports", ['.csv'])
|
568
|
+
|
569
|
+
# 6. Deploy semantic model for Power BI
|
570
|
+
con.deploy(
|
571
|
+
"https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
|
572
|
+
dataset_name="Sales Analytics"
|
573
|
+
)
|
537
574
|
```
|
538
575
|
|
539
576
|
**This example demonstrates:**
|
@@ -541,8 +578,9 @@ con.download("processed_reports", "./exports", ['.csv'])
|
|
541
578
|
- 🔄 **Pipeline orchestration** with SQL and Python tasks
|
542
579
|
- ⚡ **Fast data exploration** with DuckDB
|
543
580
|
- 💾 **Delta table creation** with Spark-style API
|
544
|
-
-
|
545
|
-
-
|
581
|
+
- 🔀 **Schema evolution** and partitioning
|
582
|
+
- 📤 **File downloads** from OneLake Files
|
583
|
+
- 📊 **Semantic model deployment** with DirectLake
|
546
584
|
|
547
585
|
## Schema Evolution & Partitioning Guide
|
548
586
|
|
@@ -400,6 +400,37 @@ success = con.run(pipeline) # Returns True only if ALL tasks succeed
|
|
400
400
|
|
401
401
|
This prevents downstream tasks from processing incomplete or corrupted data.
|
402
402
|
|
403
|
+
### Semantic Model Deployment
|
404
|
+
|
405
|
+
Deploy Power BI semantic models directly from BIM files using DirectLake mode:
|
406
|
+
|
407
|
+
```python
|
408
|
+
# Connect to lakehouse
|
409
|
+
con = duckrun.connect("Analytics/Sales.lakehouse/dbo")
|
410
|
+
|
411
|
+
# Deploy with auto-generated name (lakehouse_schema)
|
412
|
+
con.deploy("https://raw.githubusercontent.com/user/repo/main/model.bim")
|
413
|
+
|
414
|
+
# Deploy with custom name
|
415
|
+
con.deploy(
|
416
|
+
"https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
|
417
|
+
dataset_name="Sales Analytics Model",
|
418
|
+
wait_seconds=10 # Wait for permission propagation
|
419
|
+
)
|
420
|
+
```
|
421
|
+
|
422
|
+
**Features:**
|
423
|
+
- 🚀 **DirectLake Mode**: Deploys semantic models with DirectLake connection
|
424
|
+
- 🔄 **Automatic Configuration**: Auto-configures workspace, lakehouse, and schema connections
|
425
|
+
- 📦 **BIM from URL**: Load model definitions from GitHub or any accessible URL
|
426
|
+
- ⏱️ **Permission Handling**: Configurable wait time for permission propagation
|
427
|
+
|
428
|
+
**Use Cases:**
|
429
|
+
- Deploy semantic models as part of CI/CD pipelines
|
430
|
+
- Version control your semantic models in Git
|
431
|
+
- Automated model deployment across environments
|
432
|
+
- Streamline DirectLake model creation
|
433
|
+
|
403
434
|
### Delta Lake Optimization
|
404
435
|
|
405
436
|
Duckrun automatically:
|
@@ -514,6 +545,12 @@ con.sql("""
|
|
514
545
|
|
515
546
|
# 5. Download processed files for external systems
|
516
547
|
con.download("processed_reports", "./exports", ['.csv'])
|
548
|
+
|
549
|
+
# 6. Deploy semantic model for Power BI
|
550
|
+
con.deploy(
|
551
|
+
"https://raw.githubusercontent.com/user/repo/main/sales_model.bim",
|
552
|
+
dataset_name="Sales Analytics"
|
553
|
+
)
|
517
554
|
```
|
518
555
|
|
519
556
|
**This example demonstrates:**
|
@@ -521,8 +558,9 @@ con.download("processed_reports", "./exports", ['.csv'])
|
|
521
558
|
- 🔄 **Pipeline orchestration** with SQL and Python tasks
|
522
559
|
- ⚡ **Fast data exploration** with DuckDB
|
523
560
|
- 💾 **Delta table creation** with Spark-style API
|
524
|
-
-
|
525
|
-
-
|
561
|
+
- 🔀 **Schema evolution** and partitioning
|
562
|
+
- 📤 **File downloads** from OneLake Files
|
563
|
+
- 📊 **Semantic model deployment** with DirectLake
|
526
564
|
|
527
565
|
## Schema Evolution & Partitioning Guide
|
528
566
|
|
@@ -0,0 +1,249 @@
|
|
1
|
+
"""
|
2
|
+
Enhanced authentication module for duckrun - supports multiple notebook environments
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
from typing import Optional, Tuple
|
6
|
+
|
7
|
+
|
8
|
+
def get_token() -> Optional[str]:
|
9
|
+
"""
|
10
|
+
Smart authentication that works across multiple environments:
|
11
|
+
- Microsoft Fabric notebooks (uses notebookutils)
|
12
|
+
- Local environments with Azure CLI (uses CLI + browser fallback)
|
13
|
+
- Google Colab (uses device code flow)
|
14
|
+
- Other headless environments (uses device code flow)
|
15
|
+
- Existing token from environment (uses cached token)
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
Azure Storage token string or None if authentication fails
|
19
|
+
"""
|
20
|
+
# Check if we already have a cached token
|
21
|
+
token_env = os.environ.get("AZURE_STORAGE_TOKEN")
|
22
|
+
if token_env and token_env != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
23
|
+
print("✅ Using existing Azure Storage token")
|
24
|
+
return token_env
|
25
|
+
|
26
|
+
print("🔐 Starting Azure authentication...")
|
27
|
+
|
28
|
+
# Try Fabric notebook environment first
|
29
|
+
try:
|
30
|
+
import notebookutils # type: ignore
|
31
|
+
print("📓 Microsoft Fabric notebook detected - using notebookutils")
|
32
|
+
token = notebookutils.credentials.getToken("pbi")
|
33
|
+
os.environ["AZURE_STORAGE_TOKEN"] = token
|
34
|
+
print("✅ Fabric notebook authentication successful!")
|
35
|
+
return token
|
36
|
+
except ImportError:
|
37
|
+
pass # Not in Fabric notebook
|
38
|
+
except Exception as e:
|
39
|
+
print(f"⚠️ Fabric notebook authentication failed: {e}")
|
40
|
+
|
41
|
+
# Detect environment type for fallback authentication
|
42
|
+
try:
|
43
|
+
# Check if we're in Google Colab first
|
44
|
+
try:
|
45
|
+
import google.colab
|
46
|
+
print("🚀 Google Colab detected - using device code flow")
|
47
|
+
return _get_device_code_token()
|
48
|
+
except ImportError:
|
49
|
+
pass
|
50
|
+
|
51
|
+
# For all other environments (including VS Code), try Azure CLI first
|
52
|
+
# This includes local development, VS Code notebooks, etc.
|
53
|
+
print("🖥️ Local/VS Code environment detected - trying Azure CLI first, then browser fallback")
|
54
|
+
return _get_local_token()
|
55
|
+
|
56
|
+
except Exception as e:
|
57
|
+
print(f"❌ Authentication failed: {e}")
|
58
|
+
print("💡 Try refreshing and running again, or check your Azure permissions")
|
59
|
+
return None
|
60
|
+
|
61
|
+
|
62
|
+
def _get_device_code_token() -> Optional[str]:
|
63
|
+
"""Get token using device code flow for headless environments"""
|
64
|
+
try:
|
65
|
+
from azure.identity import DeviceCodeCredential
|
66
|
+
|
67
|
+
# Use Azure CLI client ID for device code flow
|
68
|
+
credential = DeviceCodeCredential(
|
69
|
+
client_id="04b07795-8ddb-461a-bbee-02f9e1bf7b46", # Azure CLI client ID
|
70
|
+
tenant_id="common"
|
71
|
+
)
|
72
|
+
|
73
|
+
print("🔐 Follow the authentication prompts in your browser...")
|
74
|
+
token_obj = credential.get_token("https://storage.azure.com/.default")
|
75
|
+
|
76
|
+
os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
|
77
|
+
print("✅ Device code authentication successful!")
|
78
|
+
return token_obj.token
|
79
|
+
|
80
|
+
except Exception as e:
|
81
|
+
print(f"❌ Device code authentication failed: {e}")
|
82
|
+
return None
|
83
|
+
|
84
|
+
|
85
|
+
def _get_local_token() -> Optional[str]:
|
86
|
+
"""Get token using CLI first, then browser fallback for local environments"""
|
87
|
+
# First try Azure CLI directly
|
88
|
+
try:
|
89
|
+
from azure.identity import AzureCliCredential
|
90
|
+
print("🔐 Trying Azure CLI authentication...")
|
91
|
+
|
92
|
+
cli_credential = AzureCliCredential()
|
93
|
+
token_obj = cli_credential.get_token("https://storage.azure.com/.default")
|
94
|
+
|
95
|
+
os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
|
96
|
+
print("✅ Azure CLI authentication successful!")
|
97
|
+
return token_obj.token
|
98
|
+
|
99
|
+
except Exception as cli_error:
|
100
|
+
print(f"⚠️ Azure CLI authentication failed: {cli_error}")
|
101
|
+
print("🔐 Falling back to interactive browser authentication...")
|
102
|
+
|
103
|
+
# Fallback to interactive browser
|
104
|
+
try:
|
105
|
+
from azure.identity import InteractiveBrowserCredential
|
106
|
+
|
107
|
+
browser_credential = InteractiveBrowserCredential()
|
108
|
+
token_obj = browser_credential.get_token("https://storage.azure.com/.default")
|
109
|
+
|
110
|
+
os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
|
111
|
+
print("✅ Interactive browser authentication successful!")
|
112
|
+
return token_obj.token
|
113
|
+
|
114
|
+
except Exception as browser_error:
|
115
|
+
print(f"❌ Interactive browser authentication failed: {browser_error}")
|
116
|
+
return None
|
117
|
+
|
118
|
+
|
119
|
+
def get_fabric_api_token() -> Optional[str]:
|
120
|
+
"""
|
121
|
+
Get token for Fabric API operations (different scope than storage)
|
122
|
+
|
123
|
+
Returns:
|
124
|
+
Fabric API token string or None if authentication fails
|
125
|
+
"""
|
126
|
+
# Check if we already have a cached Fabric API token
|
127
|
+
fabric_token_env = os.environ.get("FABRIC_API_TOKEN")
|
128
|
+
if fabric_token_env:
|
129
|
+
print("✅ Using cached Fabric API token")
|
130
|
+
return fabric_token_env
|
131
|
+
|
132
|
+
print("🔐 Getting Fabric API token...")
|
133
|
+
|
134
|
+
# Try Fabric notebook environment first
|
135
|
+
try:
|
136
|
+
import notebookutils # type: ignore
|
137
|
+
print("📓 Microsoft Fabric notebook detected - using notebookutils")
|
138
|
+
token = notebookutils.credentials.getToken("pbi")
|
139
|
+
os.environ["FABRIC_API_TOKEN"] = token
|
140
|
+
print("✅ Fabric API token obtained!")
|
141
|
+
return token
|
142
|
+
except ImportError:
|
143
|
+
pass # Not in Fabric notebook
|
144
|
+
except Exception as e:
|
145
|
+
print(f"⚠️ Fabric notebook token failed: {e}")
|
146
|
+
|
147
|
+
# Fallback to azure-identity for external environments
|
148
|
+
try:
|
149
|
+
# Check if we're in Google Colab
|
150
|
+
try:
|
151
|
+
import google.colab
|
152
|
+
print("💻 Using device code flow for Fabric API (Colab)")
|
153
|
+
from azure.identity import DeviceCodeCredential
|
154
|
+
credential = DeviceCodeCredential(
|
155
|
+
client_id="04b07795-8ddb-461a-bbee-02f9e1bf7b46",
|
156
|
+
tenant_id="common"
|
157
|
+
)
|
158
|
+
except ImportError:
|
159
|
+
# For all other environments, try CLI first then browser
|
160
|
+
print("🖥️ Using CLI + browser fallback for Fabric API")
|
161
|
+
|
162
|
+
# Try CLI first
|
163
|
+
try:
|
164
|
+
from azure.identity import AzureCliCredential
|
165
|
+
print("🔐 Trying Azure CLI for Fabric API...")
|
166
|
+
credential = AzureCliCredential()
|
167
|
+
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
168
|
+
os.environ["FABRIC_API_TOKEN"] = token_obj.token
|
169
|
+
print("✅ Fabric API token obtained via Azure CLI!")
|
170
|
+
return token_obj.token
|
171
|
+
except Exception as cli_error:
|
172
|
+
print(f"⚠️ Azure CLI failed for Fabric API: {cli_error}")
|
173
|
+
print("🔐 Falling back to interactive browser for Fabric API...")
|
174
|
+
from azure.identity import InteractiveBrowserCredential
|
175
|
+
credential = InteractiveBrowserCredential()
|
176
|
+
|
177
|
+
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
178
|
+
os.environ["FABRIC_API_TOKEN"] = token_obj.token
|
179
|
+
print("✅ Fabric API token obtained!")
|
180
|
+
return token_obj.token
|
181
|
+
|
182
|
+
except Exception as e:
|
183
|
+
print(f"❌ Fabric API authentication failed: {e}")
|
184
|
+
return None
|
185
|
+
|
186
|
+
|
187
|
+
def authenticate_for_environment() -> Tuple[bool, Optional[str]]:
|
188
|
+
"""
|
189
|
+
Main authentication entry point - detects environment and authenticates appropriately
|
190
|
+
|
191
|
+
Returns:
|
192
|
+
Tuple of (success: bool, token: Optional[str])
|
193
|
+
"""
|
194
|
+
print("\n🔍 Detecting execution environment...")
|
195
|
+
|
196
|
+
# Check environment
|
197
|
+
try:
|
198
|
+
import notebookutils # type: ignore
|
199
|
+
env_type = "Microsoft Fabric Notebook"
|
200
|
+
except ImportError:
|
201
|
+
try:
|
202
|
+
import google.colab
|
203
|
+
env_type = "Google Colab"
|
204
|
+
except ImportError:
|
205
|
+
# For all other environments (VS Code, local Python, etc.)
|
206
|
+
# we'll treat as local and try Azure CLI first
|
207
|
+
env_type = "Local/VS Code Environment"
|
208
|
+
|
209
|
+
print(f"📍 Environment: {env_type}")
|
210
|
+
|
211
|
+
token = get_token()
|
212
|
+
if token:
|
213
|
+
print(f"✅ Authentication successful for {env_type}")
|
214
|
+
return True, token
|
215
|
+
else:
|
216
|
+
print(f"❌ Authentication failed for {env_type}")
|
217
|
+
return False, None
|
218
|
+
|
219
|
+
|
220
|
+
# For backward compatibility - expose the same interface as before
|
221
|
+
def get_storage_token() -> str:
|
222
|
+
"""
|
223
|
+
Backward compatible method - returns token or placeholder
|
224
|
+
"""
|
225
|
+
token = get_token()
|
226
|
+
return token if token else "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE"
|
227
|
+
|
228
|
+
|
229
|
+
# Example usage function for testing
|
230
|
+
def test_authentication():
|
231
|
+
"""
|
232
|
+
Test authentication in current environment
|
233
|
+
"""
|
234
|
+
print("=" * 60)
|
235
|
+
print("🧪 TESTING DUCKRUN AUTHENTICATION")
|
236
|
+
print("=" * 60)
|
237
|
+
|
238
|
+
success, token = authenticate_for_environment()
|
239
|
+
|
240
|
+
if success:
|
241
|
+
print("\n✅ Authentication test successful!")
|
242
|
+
print(f"Token length: {len(token) if token else 0} characters")
|
243
|
+
print(f"Token starts with: {token[:20] if token else 'None'}...")
|
244
|
+
else:
|
245
|
+
print("\n❌ Authentication test failed!")
|
246
|
+
print("Please check your Azure setup and permissions.")
|
247
|
+
|
248
|
+
print("=" * 60)
|
249
|
+
return success
|
@@ -82,6 +82,15 @@ class Duckrun:
|
|
82
82
|
|
83
83
|
self.con = duckdb.connect()
|
84
84
|
self.con.sql("SET preserve_insertion_order = false")
|
85
|
+
|
86
|
+
# Configure Azure transport for Colab (fixes SSL cert issues)
|
87
|
+
try:
|
88
|
+
import google.colab # type: ignore
|
89
|
+
self.con.sql("SET azure_transport_option_type = 'curl'")
|
90
|
+
print("🔧 Colab detected - using curl transport for Azure")
|
91
|
+
except ImportError:
|
92
|
+
pass # Not in Colab, use default transport
|
93
|
+
|
85
94
|
self._attach_lakehouse()
|
86
95
|
|
87
96
|
@classmethod
|
@@ -196,18 +205,19 @@ class Duckrun:
|
|
196
205
|
print(f"🔍 Resolving '{workspace_name}' workspace and '{lakehouse_name}' lakehouse to GUIDs (workspace has spaces)...")
|
197
206
|
|
198
207
|
try:
|
199
|
-
# Get authentication token
|
208
|
+
# Get authentication token using enhanced auth system
|
209
|
+
from .auth import get_fabric_api_token
|
210
|
+
token = get_fabric_api_token()
|
211
|
+
if not token:
|
212
|
+
raise ValueError("Failed to obtain Fabric API token")
|
213
|
+
|
214
|
+
# Try to get current workspace ID if in notebook environment
|
215
|
+
current_workspace_id = None
|
200
216
|
try:
|
201
217
|
import notebookutils # type: ignore
|
202
|
-
token = notebookutils.credentials.getToken("pbi")
|
203
218
|
current_workspace_id = notebookutils.runtime.context.get("workspaceId")
|
204
219
|
except ImportError:
|
205
|
-
|
206
|
-
# Fallback to azure-identity for external environments
|
207
|
-
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
208
|
-
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
209
|
-
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
210
|
-
token = token_obj.token
|
220
|
+
pass # Not in notebook environment
|
211
221
|
|
212
222
|
# Resolve workspace name to ID
|
213
223
|
if current_workspace_id:
|
@@ -302,19 +312,23 @@ class Duckrun:
|
|
302
312
|
return WorkspaceConnection(workspace_name)
|
303
313
|
|
304
314
|
def _get_storage_token(self):
|
305
|
-
|
315
|
+
from .auth import get_storage_token
|
316
|
+
return get_storage_token()
|
306
317
|
|
307
318
|
def _create_onelake_secret(self):
|
308
319
|
token = self._get_storage_token()
|
309
320
|
if token != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
310
321
|
self.con.sql(f"CREATE OR REPLACE SECRET onelake (TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{token}')")
|
311
322
|
else:
|
312
|
-
|
313
|
-
from
|
314
|
-
|
315
|
-
token
|
316
|
-
|
317
|
-
|
323
|
+
# Enhanced authentication - try all methods
|
324
|
+
from .auth import get_token
|
325
|
+
token = get_token()
|
326
|
+
if token:
|
327
|
+
os.environ["AZURE_STORAGE_TOKEN"] = token
|
328
|
+
self.con.sql(f"CREATE OR REPLACE SECRET onelake (TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{token}')")
|
329
|
+
else:
|
330
|
+
# Final fallback to persistent secret
|
331
|
+
self.con.sql("CREATE OR REPLACE PERSISTENT SECRET onelake (TYPE azure, PROVIDER credential_chain, CHAIN 'cli', ACCOUNT_NAME 'onelake')")
|
318
332
|
|
319
333
|
def _discover_tables_fast(self) -> List[Tuple[str, str]]:
|
320
334
|
"""
|
@@ -326,12 +340,12 @@ class Duckrun:
|
|
326
340
|
"""
|
327
341
|
token = self._get_storage_token()
|
328
342
|
if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
329
|
-
print("Authenticating with Azure for table discovery (
|
330
|
-
from
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
343
|
+
print("Authenticating with Azure for table discovery (detecting environment automatically)...")
|
344
|
+
from .auth import get_token
|
345
|
+
token = get_token()
|
346
|
+
if not token:
|
347
|
+
print("❌ Failed to authenticate for table discovery")
|
348
|
+
return []
|
335
349
|
|
336
350
|
url = f"abfss://{self.workspace}@{self.storage_account}.dfs.fabric.microsoft.com/"
|
337
351
|
store = AzureStore.from_url(url, bearer_token=token)
|
@@ -579,19 +593,22 @@ class Duckrun:
|
|
579
593
|
List of lakehouse names
|
580
594
|
"""
|
581
595
|
try:
|
582
|
-
#
|
596
|
+
# Get authentication token using enhanced auth system
|
597
|
+
from .auth import get_fabric_api_token
|
598
|
+
token = get_fabric_api_token()
|
599
|
+
if not token:
|
600
|
+
print("❌ Failed to authenticate for listing lakehouses")
|
601
|
+
return []
|
602
|
+
|
603
|
+
# Try to get current workspace ID if in notebook environment
|
604
|
+
workspace_id = None
|
583
605
|
try:
|
584
606
|
import notebookutils # type: ignore
|
585
|
-
token = notebookutils.credentials.getToken("pbi")
|
586
607
|
workspace_id = notebookutils.runtime.context.get("workspaceId")
|
587
608
|
except ImportError:
|
588
|
-
#
|
589
|
-
|
590
|
-
|
591
|
-
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
592
|
-
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
593
|
-
token = token_obj.token
|
594
|
-
|
609
|
+
pass # Not in notebook environment
|
610
|
+
|
611
|
+
if not workspace_id:
|
595
612
|
# Get workspace ID by name
|
596
613
|
workspace_id = self._get_workspace_id_by_name(token, self.workspace)
|
597
614
|
if not workspace_id:
|
@@ -626,19 +643,22 @@ class Duckrun:
|
|
626
643
|
True if lakehouse exists or was created successfully, False otherwise
|
627
644
|
"""
|
628
645
|
try:
|
629
|
-
#
|
646
|
+
# Get authentication token using enhanced auth system
|
647
|
+
from .auth import get_fabric_api_token
|
648
|
+
token = get_fabric_api_token()
|
649
|
+
if not token:
|
650
|
+
print("❌ Failed to authenticate for lakehouse creation")
|
651
|
+
return False
|
652
|
+
|
653
|
+
# Try to get current workspace ID if in notebook environment
|
654
|
+
workspace_id = None
|
630
655
|
try:
|
631
656
|
import notebookutils # type: ignore
|
632
|
-
token = notebookutils.credentials.getToken("pbi")
|
633
657
|
workspace_id = notebookutils.runtime.context.get("workspaceId")
|
634
658
|
except ImportError:
|
635
|
-
#
|
636
|
-
|
637
|
-
|
638
|
-
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
639
|
-
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
640
|
-
token = token_obj.token
|
641
|
-
|
659
|
+
pass # Not in notebook environment
|
660
|
+
|
661
|
+
if not workspace_id:
|
642
662
|
# Get workspace ID by name
|
643
663
|
workspace_id = self._get_workspace_id_by_name(token, self.workspace)
|
644
664
|
if not workspace_id:
|
@@ -676,6 +696,45 @@ class Duckrun:
|
|
676
696
|
print(f"❌ Error creating lakehouse '{lakehouse_name}': {e}")
|
677
697
|
return False
|
678
698
|
|
699
|
+
def deploy(self, bim_url: str, dataset_name: Optional[str] = None,
|
700
|
+
wait_seconds: int = 5) -> int:
|
701
|
+
"""
|
702
|
+
Deploy a semantic model from a BIM file using DirectLake mode.
|
703
|
+
|
704
|
+
Args:
|
705
|
+
bim_url: URL to the BIM file (e.g., GitHub raw URL)
|
706
|
+
dataset_name: Name for the semantic model (default: lakehouse_schema)
|
707
|
+
wait_seconds: Seconds to wait for permission propagation (default: 5)
|
708
|
+
|
709
|
+
Returns:
|
710
|
+
1 for success, 0 for failure
|
711
|
+
|
712
|
+
Examples:
|
713
|
+
dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
|
714
|
+
|
715
|
+
# Deploy with auto-generated name
|
716
|
+
dr.deploy("https://raw.githubusercontent.com/.../model.bim")
|
717
|
+
|
718
|
+
# Deploy with custom name
|
719
|
+
dr.deploy("https://raw.githubusercontent.com/.../model.bim",
|
720
|
+
dataset_name="Sales Model")
|
721
|
+
"""
|
722
|
+
from .semantic_model import deploy_semantic_model
|
723
|
+
|
724
|
+
# Auto-generate dataset name if not provided
|
725
|
+
if dataset_name is None:
|
726
|
+
dataset_name = f"{self.lakehouse_name}_{self.schema}"
|
727
|
+
|
728
|
+
# Call the deployment function (DirectLake only)
|
729
|
+
return deploy_semantic_model(
|
730
|
+
workspace_name_or_id=self.workspace,
|
731
|
+
lakehouse_name_or_id=self.lakehouse_name,
|
732
|
+
schema_name=self.schema,
|
733
|
+
dataset_name=dataset_name,
|
734
|
+
bim_url=bim_url,
|
735
|
+
wait_seconds=wait_seconds
|
736
|
+
)
|
737
|
+
|
679
738
|
def _get_workspace_id_by_name(self, token: str, workspace_name: str) -> Optional[str]:
|
680
739
|
"""Helper method to get workspace ID from name"""
|
681
740
|
try:
|
@@ -718,28 +777,18 @@ class WorkspaceConnection:
|
|
718
777
|
List of lakehouse names
|
719
778
|
"""
|
720
779
|
try:
|
721
|
-
#
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
734
|
-
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
735
|
-
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
736
|
-
token = token_obj.token
|
737
|
-
|
738
|
-
# Get workspace ID by name
|
739
|
-
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
740
|
-
if not workspace_id:
|
741
|
-
print(f"Workspace '{self.workspace_name}' not found")
|
742
|
-
return []
|
780
|
+
# Get authentication token using enhanced auth system
|
781
|
+
from .auth import get_fabric_api_token
|
782
|
+
token = get_fabric_api_token()
|
783
|
+
if not token:
|
784
|
+
print("❌ Failed to authenticate for listing lakehouses")
|
785
|
+
return []
|
786
|
+
|
787
|
+
# Always resolve workspace name to ID, even in notebook environment
|
788
|
+
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
789
|
+
if not workspace_id:
|
790
|
+
print(f"Workspace '{self.workspace_name}' not found")
|
791
|
+
return []
|
743
792
|
|
744
793
|
# List lakehouses
|
745
794
|
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses"
|
@@ -768,28 +817,18 @@ class WorkspaceConnection:
|
|
768
817
|
True if lakehouse exists or was created successfully, False otherwise
|
769
818
|
"""
|
770
819
|
try:
|
771
|
-
#
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
784
|
-
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
785
|
-
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
786
|
-
token = token_obj.token
|
787
|
-
|
788
|
-
# Get workspace ID by name
|
789
|
-
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
790
|
-
if not workspace_id:
|
791
|
-
print(f"Workspace '{self.workspace_name}' not found")
|
792
|
-
return False
|
820
|
+
# Get authentication token using enhanced auth system
|
821
|
+
from .auth import get_fabric_api_token
|
822
|
+
token = get_fabric_api_token()
|
823
|
+
if not token:
|
824
|
+
print("❌ Failed to authenticate for lakehouse creation")
|
825
|
+
return False
|
826
|
+
|
827
|
+
# Always resolve workspace name to ID, even in notebook environment
|
828
|
+
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
829
|
+
if not workspace_id:
|
830
|
+
print(f"Workspace '{self.workspace_name}' not found")
|
831
|
+
return False
|
793
832
|
|
794
833
|
# Check if lakehouse already exists
|
795
834
|
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses"
|