duckrun 0.2.6__py3-none-any.whl → 0.2.8.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- duckrun/auth.py +240 -0
- duckrun/core.py +74 -83
- duckrun/files.py +14 -14
- duckrun/runner.py +12 -5
- duckrun/writer.py +12 -5
- {duckrun-0.2.6.dist-info → duckrun-0.2.8.dev0.dist-info}/METADATA +25 -7
- duckrun-0.2.8.dev0.dist-info/RECORD +13 -0
- duckrun-0.2.6.dist-info/RECORD +0 -12
- {duckrun-0.2.6.dist-info → duckrun-0.2.8.dev0.dist-info}/WHEEL +0 -0
- {duckrun-0.2.6.dist-info → duckrun-0.2.8.dev0.dist-info}/licenses/LICENSE +0 -0
- {duckrun-0.2.6.dist-info → duckrun-0.2.8.dev0.dist-info}/top_level.txt +0 -0
duckrun/auth.py
ADDED
@@ -0,0 +1,240 @@
|
|
1
|
+
"""
|
2
|
+
Enhanced authentication module for duckrun - supports multiple notebook environments
|
3
|
+
"""
|
4
|
+
import os
|
5
|
+
from typing import Optional, Tuple
|
6
|
+
|
7
|
+
|
8
|
+
def get_token() -> Optional[str]:
|
9
|
+
"""
|
10
|
+
Smart authentication that works across multiple environments:
|
11
|
+
- Microsoft Fabric notebooks (uses notebookutils)
|
12
|
+
- Local environments with Azure CLI (uses CLI + browser fallback)
|
13
|
+
- Google Colab (uses device code flow)
|
14
|
+
- Other headless environments (uses device code flow)
|
15
|
+
- Existing token from environment (uses cached token)
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
Azure Storage token string or None if authentication fails
|
19
|
+
"""
|
20
|
+
# Check if we already have a cached token
|
21
|
+
token_env = os.environ.get("AZURE_STORAGE_TOKEN")
|
22
|
+
if token_env and token_env != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
23
|
+
print("✅ Using existing Azure Storage token")
|
24
|
+
return token_env
|
25
|
+
|
26
|
+
print("🔐 Starting Azure authentication...")
|
27
|
+
|
28
|
+
# Try Fabric notebook environment first
|
29
|
+
try:
|
30
|
+
import notebookutils # type: ignore
|
31
|
+
print("📓 Microsoft Fabric notebook detected - using notebookutils")
|
32
|
+
token = notebookutils.credentials.getToken("pbi")
|
33
|
+
os.environ["AZURE_STORAGE_TOKEN"] = token
|
34
|
+
print("✅ Fabric notebook authentication successful!")
|
35
|
+
return token
|
36
|
+
except ImportError:
|
37
|
+
pass # Not in Fabric notebook
|
38
|
+
except Exception as e:
|
39
|
+
print(f"⚠️ Fabric notebook authentication failed: {e}")
|
40
|
+
|
41
|
+
# Detect environment type for fallback authentication
|
42
|
+
try:
|
43
|
+
# Check if we're in Google Colab first
|
44
|
+
try:
|
45
|
+
import google.colab
|
46
|
+
print("🚀 Google Colab detected - using device code flow")
|
47
|
+
return _get_device_code_token()
|
48
|
+
except ImportError:
|
49
|
+
pass
|
50
|
+
|
51
|
+
# For all other environments (including VS Code), try Azure CLI first
|
52
|
+
# This includes local development, VS Code notebooks, etc.
|
53
|
+
print("🖥️ Local/VS Code environment detected - trying Azure CLI first, then browser fallback")
|
54
|
+
return _get_local_token()
|
55
|
+
|
56
|
+
except Exception as e:
|
57
|
+
print(f"❌ Authentication failed: {e}")
|
58
|
+
print("💡 Try refreshing and running again, or check your Azure permissions")
|
59
|
+
return None
|
60
|
+
|
61
|
+
|
62
|
+
def _get_device_code_token() -> Optional[str]:
|
63
|
+
"""Get token using device code flow for headless environments"""
|
64
|
+
try:
|
65
|
+
from azure.identity import DeviceCodeCredential
|
66
|
+
|
67
|
+
# Use Azure CLI client ID for device code flow
|
68
|
+
credential = DeviceCodeCredential(
|
69
|
+
client_id="04b07795-8ddb-461a-bbee-02f9e1bf7b46", # Azure CLI client ID
|
70
|
+
tenant_id="common"
|
71
|
+
)
|
72
|
+
|
73
|
+
print("🔐 Follow the authentication prompts in your browser...")
|
74
|
+
token_obj = credential.get_token("https://storage.azure.com/.default")
|
75
|
+
|
76
|
+
os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
|
77
|
+
print("✅ Device code authentication successful!")
|
78
|
+
return token_obj.token
|
79
|
+
|
80
|
+
except Exception as e:
|
81
|
+
print(f"❌ Device code authentication failed: {e}")
|
82
|
+
return None
|
83
|
+
|
84
|
+
|
85
|
+
def _get_local_token() -> Optional[str]:
|
86
|
+
"""Get token using CLI first, then browser fallback for local environments"""
|
87
|
+
# First try Azure CLI directly
|
88
|
+
try:
|
89
|
+
from azure.identity import AzureCliCredential
|
90
|
+
print("🔐 Trying Azure CLI authentication...")
|
91
|
+
|
92
|
+
cli_credential = AzureCliCredential()
|
93
|
+
token_obj = cli_credential.get_token("https://storage.azure.com/.default")
|
94
|
+
|
95
|
+
os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
|
96
|
+
print("✅ Azure CLI authentication successful!")
|
97
|
+
return token_obj.token
|
98
|
+
|
99
|
+
except Exception as cli_error:
|
100
|
+
print(f"⚠️ Azure CLI authentication failed: {cli_error}")
|
101
|
+
print("🔐 Falling back to interactive browser authentication...")
|
102
|
+
|
103
|
+
# Fallback to interactive browser
|
104
|
+
try:
|
105
|
+
from azure.identity import InteractiveBrowserCredential
|
106
|
+
|
107
|
+
browser_credential = InteractiveBrowserCredential()
|
108
|
+
token_obj = browser_credential.get_token("https://storage.azure.com/.default")
|
109
|
+
|
110
|
+
os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
|
111
|
+
print("✅ Interactive browser authentication successful!")
|
112
|
+
return token_obj.token
|
113
|
+
|
114
|
+
except Exception as browser_error:
|
115
|
+
print(f"❌ Interactive browser authentication failed: {browser_error}")
|
116
|
+
return None
|
117
|
+
|
118
|
+
|
119
|
+
def get_fabric_api_token() -> Optional[str]:
|
120
|
+
"""
|
121
|
+
Get token for Fabric API operations (different scope than storage)
|
122
|
+
|
123
|
+
Returns:
|
124
|
+
Fabric API token string or None if authentication fails
|
125
|
+
"""
|
126
|
+
print("🔐 Getting Fabric API token...")
|
127
|
+
|
128
|
+
# Try Fabric notebook environment first
|
129
|
+
try:
|
130
|
+
import notebookutils # type: ignore
|
131
|
+
print("📓 Microsoft Fabric notebook detected - using notebookutils")
|
132
|
+
token = notebookutils.credentials.getToken("pbi")
|
133
|
+
print("✅ Fabric API token obtained!")
|
134
|
+
return token
|
135
|
+
except ImportError:
|
136
|
+
pass # Not in Fabric notebook
|
137
|
+
except Exception as e:
|
138
|
+
print(f"⚠️ Fabric notebook token failed: {e}")
|
139
|
+
|
140
|
+
# Fallback to azure-identity for external environments
|
141
|
+
try:
|
142
|
+
# Check if we're in Google Colab
|
143
|
+
try:
|
144
|
+
import google.colab
|
145
|
+
print("💻 Using device code flow for Fabric API (Colab)")
|
146
|
+
from azure.identity import DeviceCodeCredential
|
147
|
+
credential = DeviceCodeCredential(
|
148
|
+
client_id="04b07795-8ddb-461a-bbee-02f9e1bf7b46",
|
149
|
+
tenant_id="common"
|
150
|
+
)
|
151
|
+
except ImportError:
|
152
|
+
# For all other environments, try CLI first then browser
|
153
|
+
print("🖥️ Using CLI + browser fallback for Fabric API")
|
154
|
+
|
155
|
+
# Try CLI first
|
156
|
+
try:
|
157
|
+
from azure.identity import AzureCliCredential
|
158
|
+
print("🔐 Trying Azure CLI for Fabric API...")
|
159
|
+
credential = AzureCliCredential()
|
160
|
+
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
161
|
+
print("✅ Fabric API token obtained via Azure CLI!")
|
162
|
+
return token_obj.token
|
163
|
+
except Exception as cli_error:
|
164
|
+
print(f"⚠️ Azure CLI failed for Fabric API: {cli_error}")
|
165
|
+
print("🔐 Falling back to interactive browser for Fabric API...")
|
166
|
+
from azure.identity import InteractiveBrowserCredential
|
167
|
+
credential = InteractiveBrowserCredential()
|
168
|
+
|
169
|
+
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
170
|
+
print("✅ Fabric API token obtained!")
|
171
|
+
return token_obj.token
|
172
|
+
|
173
|
+
except Exception as e:
|
174
|
+
print(f"❌ Fabric API authentication failed: {e}")
|
175
|
+
return None
|
176
|
+
|
177
|
+
|
178
|
+
def authenticate_for_environment() -> Tuple[bool, Optional[str]]:
|
179
|
+
"""
|
180
|
+
Main authentication entry point - detects environment and authenticates appropriately
|
181
|
+
|
182
|
+
Returns:
|
183
|
+
Tuple of (success: bool, token: Optional[str])
|
184
|
+
"""
|
185
|
+
print("\n🔍 Detecting execution environment...")
|
186
|
+
|
187
|
+
# Check environment
|
188
|
+
try:
|
189
|
+
import notebookutils # type: ignore
|
190
|
+
env_type = "Microsoft Fabric Notebook"
|
191
|
+
except ImportError:
|
192
|
+
try:
|
193
|
+
import google.colab
|
194
|
+
env_type = "Google Colab"
|
195
|
+
except ImportError:
|
196
|
+
# For all other environments (VS Code, local Python, etc.)
|
197
|
+
# we'll treat as local and try Azure CLI first
|
198
|
+
env_type = "Local/VS Code Environment"
|
199
|
+
|
200
|
+
print(f"📍 Environment: {env_type}")
|
201
|
+
|
202
|
+
token = get_token()
|
203
|
+
if token:
|
204
|
+
print(f"✅ Authentication successful for {env_type}")
|
205
|
+
return True, token
|
206
|
+
else:
|
207
|
+
print(f"❌ Authentication failed for {env_type}")
|
208
|
+
return False, None
|
209
|
+
|
210
|
+
|
211
|
+
# For backward compatibility - expose the same interface as before
|
212
|
+
def get_storage_token() -> str:
|
213
|
+
"""
|
214
|
+
Backward compatible method - returns token or placeholder
|
215
|
+
"""
|
216
|
+
token = get_token()
|
217
|
+
return token if token else "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE"
|
218
|
+
|
219
|
+
|
220
|
+
# Example usage function for testing
|
221
|
+
def test_authentication():
|
222
|
+
"""
|
223
|
+
Test authentication in current environment
|
224
|
+
"""
|
225
|
+
print("=" * 60)
|
226
|
+
print("🧪 TESTING DUCKRUN AUTHENTICATION")
|
227
|
+
print("=" * 60)
|
228
|
+
|
229
|
+
success, token = authenticate_for_environment()
|
230
|
+
|
231
|
+
if success:
|
232
|
+
print("\n✅ Authentication test successful!")
|
233
|
+
print(f"Token length: {len(token) if token else 0} characters")
|
234
|
+
print(f"Token starts with: {token[:20] if token else 'None'}...")
|
235
|
+
else:
|
236
|
+
print("\n❌ Authentication test failed!")
|
237
|
+
print("Please check your Azure setup and permissions.")
|
238
|
+
|
239
|
+
print("=" * 60)
|
240
|
+
return success
|
duckrun/core.py
CHANGED
@@ -196,18 +196,19 @@ class Duckrun:
|
|
196
196
|
print(f"🔍 Resolving '{workspace_name}' workspace and '{lakehouse_name}' lakehouse to GUIDs (workspace has spaces)...")
|
197
197
|
|
198
198
|
try:
|
199
|
-
# Get authentication token
|
199
|
+
# Get authentication token using enhanced auth system
|
200
|
+
from .auth import get_fabric_api_token
|
201
|
+
token = get_fabric_api_token()
|
202
|
+
if not token:
|
203
|
+
raise ValueError("Failed to obtain Fabric API token")
|
204
|
+
|
205
|
+
# Try to get current workspace ID if in notebook environment
|
206
|
+
current_workspace_id = None
|
200
207
|
try:
|
201
208
|
import notebookutils # type: ignore
|
202
|
-
token = notebookutils.credentials.getToken("pbi")
|
203
209
|
current_workspace_id = notebookutils.runtime.context.get("workspaceId")
|
204
210
|
except ImportError:
|
205
|
-
|
206
|
-
# Fallback to azure-identity for external environments
|
207
|
-
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
208
|
-
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
209
|
-
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
210
|
-
token = token_obj.token
|
211
|
+
pass # Not in notebook environment
|
211
212
|
|
212
213
|
# Resolve workspace name to ID
|
213
214
|
if current_workspace_id:
|
@@ -302,19 +303,23 @@ class Duckrun:
|
|
302
303
|
return WorkspaceConnection(workspace_name)
|
303
304
|
|
304
305
|
def _get_storage_token(self):
|
305
|
-
|
306
|
+
from .auth import get_storage_token
|
307
|
+
return get_storage_token()
|
306
308
|
|
307
309
|
def _create_onelake_secret(self):
|
308
310
|
token = self._get_storage_token()
|
309
311
|
if token != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
310
312
|
self.con.sql(f"CREATE OR REPLACE SECRET onelake (TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{token}')")
|
311
313
|
else:
|
312
|
-
|
313
|
-
from
|
314
|
-
|
315
|
-
token
|
316
|
-
|
317
|
-
|
314
|
+
# Enhanced authentication - try all methods
|
315
|
+
from .auth import get_token
|
316
|
+
token = get_token()
|
317
|
+
if token:
|
318
|
+
os.environ["AZURE_STORAGE_TOKEN"] = token
|
319
|
+
self.con.sql(f"CREATE OR REPLACE SECRET onelake (TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{token}')")
|
320
|
+
else:
|
321
|
+
# Final fallback to persistent secret
|
322
|
+
self.con.sql("CREATE OR REPLACE PERSISTENT SECRET onelake (TYPE azure, PROVIDER credential_chain, CHAIN 'cli', ACCOUNT_NAME 'onelake')")
|
318
323
|
|
319
324
|
def _discover_tables_fast(self) -> List[Tuple[str, str]]:
|
320
325
|
"""
|
@@ -326,12 +331,12 @@ class Duckrun:
|
|
326
331
|
"""
|
327
332
|
token = self._get_storage_token()
|
328
333
|
if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
329
|
-
print("Authenticating with Azure for table discovery (
|
330
|
-
from
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
334
|
+
print("Authenticating with Azure for table discovery (detecting environment automatically)...")
|
335
|
+
from .auth import get_token
|
336
|
+
token = get_token()
|
337
|
+
if not token:
|
338
|
+
print("❌ Failed to authenticate for table discovery")
|
339
|
+
return []
|
335
340
|
|
336
341
|
url = f"abfss://{self.workspace}@{self.storage_account}.dfs.fabric.microsoft.com/"
|
337
342
|
store = AzureStore.from_url(url, bearer_token=token)
|
@@ -579,19 +584,22 @@ class Duckrun:
|
|
579
584
|
List of lakehouse names
|
580
585
|
"""
|
581
586
|
try:
|
582
|
-
#
|
587
|
+
# Get authentication token using enhanced auth system
|
588
|
+
from .auth import get_fabric_api_token
|
589
|
+
token = get_fabric_api_token()
|
590
|
+
if not token:
|
591
|
+
print("❌ Failed to authenticate for listing lakehouses")
|
592
|
+
return []
|
593
|
+
|
594
|
+
# Try to get current workspace ID if in notebook environment
|
595
|
+
workspace_id = None
|
583
596
|
try:
|
584
597
|
import notebookutils # type: ignore
|
585
|
-
token = notebookutils.credentials.getToken("pbi")
|
586
598
|
workspace_id = notebookutils.runtime.context.get("workspaceId")
|
587
599
|
except ImportError:
|
588
|
-
#
|
589
|
-
|
590
|
-
|
591
|
-
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
592
|
-
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
593
|
-
token = token_obj.token
|
594
|
-
|
600
|
+
pass # Not in notebook environment
|
601
|
+
|
602
|
+
if not workspace_id:
|
595
603
|
# Get workspace ID by name
|
596
604
|
workspace_id = self._get_workspace_id_by_name(token, self.workspace)
|
597
605
|
if not workspace_id:
|
@@ -626,19 +634,22 @@ class Duckrun:
|
|
626
634
|
True if lakehouse exists or was created successfully, False otherwise
|
627
635
|
"""
|
628
636
|
try:
|
629
|
-
#
|
637
|
+
# Get authentication token using enhanced auth system
|
638
|
+
from .auth import get_fabric_api_token
|
639
|
+
token = get_fabric_api_token()
|
640
|
+
if not token:
|
641
|
+
print("❌ Failed to authenticate for lakehouse creation")
|
642
|
+
return False
|
643
|
+
|
644
|
+
# Try to get current workspace ID if in notebook environment
|
645
|
+
workspace_id = None
|
630
646
|
try:
|
631
647
|
import notebookutils # type: ignore
|
632
|
-
token = notebookutils.credentials.getToken("pbi")
|
633
648
|
workspace_id = notebookutils.runtime.context.get("workspaceId")
|
634
649
|
except ImportError:
|
635
|
-
#
|
636
|
-
|
637
|
-
|
638
|
-
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
639
|
-
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
640
|
-
token = token_obj.token
|
641
|
-
|
650
|
+
pass # Not in notebook environment
|
651
|
+
|
652
|
+
if not workspace_id:
|
642
653
|
# Get workspace ID by name
|
643
654
|
workspace_id = self._get_workspace_id_by_name(token, self.workspace)
|
644
655
|
if not workspace_id:
|
@@ -718,28 +729,18 @@ class WorkspaceConnection:
|
|
718
729
|
List of lakehouse names
|
719
730
|
"""
|
720
731
|
try:
|
721
|
-
#
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
734
|
-
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
735
|
-
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
736
|
-
token = token_obj.token
|
737
|
-
|
738
|
-
# Get workspace ID by name
|
739
|
-
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
740
|
-
if not workspace_id:
|
741
|
-
print(f"Workspace '{self.workspace_name}' not found")
|
742
|
-
return []
|
732
|
+
# Get authentication token using enhanced auth system
|
733
|
+
from .auth import get_fabric_api_token
|
734
|
+
token = get_fabric_api_token()
|
735
|
+
if not token:
|
736
|
+
print("❌ Failed to authenticate for listing lakehouses")
|
737
|
+
return []
|
738
|
+
|
739
|
+
# Always resolve workspace name to ID, even in notebook environment
|
740
|
+
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
741
|
+
if not workspace_id:
|
742
|
+
print(f"Workspace '{self.workspace_name}' not found")
|
743
|
+
return []
|
743
744
|
|
744
745
|
# List lakehouses
|
745
746
|
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses"
|
@@ -768,28 +769,18 @@ class WorkspaceConnection:
|
|
768
769
|
True if lakehouse exists or was created successfully, False otherwise
|
769
770
|
"""
|
770
771
|
try:
|
771
|
-
#
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
|
784
|
-
credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
|
785
|
-
token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
|
786
|
-
token = token_obj.token
|
787
|
-
|
788
|
-
# Get workspace ID by name
|
789
|
-
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
790
|
-
if not workspace_id:
|
791
|
-
print(f"Workspace '{self.workspace_name}' not found")
|
792
|
-
return False
|
772
|
+
# Get authentication token using enhanced auth system
|
773
|
+
from .auth import get_fabric_api_token
|
774
|
+
token = get_fabric_api_token()
|
775
|
+
if not token:
|
776
|
+
print("❌ Failed to authenticate for lakehouse creation")
|
777
|
+
return False
|
778
|
+
|
779
|
+
# Always resolve workspace name to ID, even in notebook environment
|
780
|
+
workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
|
781
|
+
if not workspace_id:
|
782
|
+
print(f"Workspace '{self.workspace_name}' not found")
|
783
|
+
return False
|
793
784
|
|
794
785
|
# Check if lakehouse already exists
|
795
786
|
url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses"
|
duckrun/files.py
CHANGED
@@ -41,15 +41,15 @@ def copy(duckrun_instance, local_folder: str, remote_folder: str,
|
|
41
41
|
print(f"❌ Path is not a directory: {local_folder}")
|
42
42
|
return False
|
43
43
|
|
44
|
-
# Get Azure token
|
44
|
+
# Get Azure token using enhanced auth system
|
45
|
+
from .auth import get_token
|
45
46
|
token = duckrun_instance._get_storage_token()
|
46
47
|
if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
47
|
-
print("Authenticating with Azure for file upload (
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
os.environ["AZURE_STORAGE_TOKEN"] = token
|
48
|
+
print("Authenticating with Azure for file upload (detecting environment automatically)...")
|
49
|
+
token = get_token()
|
50
|
+
if not token:
|
51
|
+
print("❌ Failed to authenticate for file upload")
|
52
|
+
return False
|
53
53
|
|
54
54
|
# Setup OneLake Files URL (use correct format without .Lakehouse suffix)
|
55
55
|
files_base_url = duckrun_instance.files_base_url
|
@@ -150,15 +150,15 @@ def download(duckrun_instance, remote_folder: str = "", local_folder: str = "./d
|
|
150
150
|
# Download only CSV files from a specific subfolder
|
151
151
|
dr.download("daily_reports", "./reports", ['.csv'])
|
152
152
|
"""
|
153
|
-
# Get Azure token
|
153
|
+
# Get Azure token using enhanced auth system
|
154
|
+
from .auth import get_token
|
154
155
|
token = duckrun_instance._get_storage_token()
|
155
156
|
if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
|
156
|
-
print("Authenticating with Azure for file download (
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
os.environ["AZURE_STORAGE_TOKEN"] = token
|
157
|
+
print("Authenticating with Azure for file download (detecting environment automatically)...")
|
158
|
+
token = get_token()
|
159
|
+
if not token:
|
160
|
+
print("❌ Failed to authenticate for file download")
|
161
|
+
return False
|
162
162
|
|
163
163
|
# Setup OneLake Files URL (use correct format without .Lakehouse suffix)
|
164
164
|
files_base_url = duckrun_instance.files_base_url
|
duckrun/runner.py
CHANGED
@@ -15,7 +15,7 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
|
|
15
15
|
"""
|
16
16
|
Build arguments for write_deltalake based on requirements:
|
17
17
|
- If schema_mode='merge': use rust engine (no row group params)
|
18
|
-
- Otherwise: use pyarrow engine with row group optimization
|
18
|
+
- Otherwise: use pyarrow engine with row group optimization (if supported)
|
19
19
|
"""
|
20
20
|
args = {
|
21
21
|
'table_or_uri': path,
|
@@ -33,10 +33,17 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
|
|
33
33
|
args['schema_mode'] = 'merge'
|
34
34
|
args['engine'] = 'rust'
|
35
35
|
else:
|
36
|
-
#
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
# Try to use pyarrow engine with row group optimization
|
37
|
+
# Check if row group parameters are supported by inspecting function signature
|
38
|
+
import inspect
|
39
|
+
sig = inspect.signature(write_deltalake)
|
40
|
+
|
41
|
+
if 'max_rows_per_file' in sig.parameters:
|
42
|
+
# Older deltalake version - use row group optimization
|
43
|
+
args['max_rows_per_file'] = RG
|
44
|
+
args['max_rows_per_group'] = RG
|
45
|
+
args['min_rows_per_group'] = RG
|
46
|
+
# For newer versions, just use default parameters
|
40
47
|
|
41
48
|
return args
|
42
49
|
|
duckrun/writer.py
CHANGED
@@ -12,7 +12,7 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
|
|
12
12
|
"""
|
13
13
|
Build arguments for write_deltalake based on requirements:
|
14
14
|
- If schema_mode='merge': use rust engine (no row group params)
|
15
|
-
- Otherwise: use pyarrow engine with row group optimization
|
15
|
+
- Otherwise: use pyarrow engine with row group optimization (if supported)
|
16
16
|
"""
|
17
17
|
args = {
|
18
18
|
'table_or_uri': path,
|
@@ -30,10 +30,17 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
|
|
30
30
|
args['schema_mode'] = 'merge'
|
31
31
|
args['engine'] = 'rust'
|
32
32
|
else:
|
33
|
-
#
|
34
|
-
|
35
|
-
|
36
|
-
|
33
|
+
# Try to use pyarrow engine with row group optimization
|
34
|
+
# Check if row group parameters are supported by inspecting function signature
|
35
|
+
import inspect
|
36
|
+
sig = inspect.signature(write_deltalake)
|
37
|
+
|
38
|
+
if 'max_rows_per_file' in sig.parameters:
|
39
|
+
# Older deltalake version - use row group optimization
|
40
|
+
args['max_rows_per_file'] = RG
|
41
|
+
args['max_rows_per_group'] = RG
|
42
|
+
args['min_rows_per_group'] = RG
|
43
|
+
# For newer versions, just use default parameters
|
37
44
|
|
38
45
|
return args
|
39
46
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: duckrun
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.8.dev0
|
4
4
|
Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
|
5
5
|
Author: mim
|
6
6
|
License: MIT
|
@@ -26,7 +26,8 @@ A helper package for stuff that made my life easier when working with Fabric Pyt
|
|
26
26
|
|
27
27
|
**Requirements:**
|
28
28
|
- Lakehouse must have a schema (e.g., `dbo`, `sales`, `analytics`)
|
29
|
-
- **Workspace
|
29
|
+
- **Workspace names with spaces are fully supported!** ✅
|
30
|
+
|
30
31
|
|
31
32
|
**Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
|
32
33
|
|
@@ -47,23 +48,40 @@ pip install duckrun[local]
|
|
47
48
|
|
48
49
|
## Quick Start
|
49
50
|
|
51
|
+
### Simple Example for New Users
|
52
|
+
|
53
|
+
```python
|
54
|
+
import duckrun
|
55
|
+
|
56
|
+
# Connect to a workspace and manage lakehouses
|
57
|
+
con = duckrun.connect('My Workspace')
|
58
|
+
con.list_lakehouses() # See what lakehouses exist
|
59
|
+
con.create_lakehouse_if_not_exists('data') # Create if needed
|
60
|
+
|
61
|
+
# Connect to a specific lakehouse and query data
|
62
|
+
con = duckrun.connect("My Workspace/data.lakehouse/dbo")
|
63
|
+
con.sql("SELECT * FROM my_table LIMIT 10").show()
|
64
|
+
```
|
65
|
+
|
66
|
+
### Full Feature Overview
|
67
|
+
|
50
68
|
```python
|
51
69
|
import duckrun
|
52
70
|
|
53
71
|
# 1. Workspace Management (list and create lakehouses)
|
54
72
|
ws = duckrun.connect("My Workspace")
|
55
73
|
lakehouses = ws.list_lakehouses() # Returns list of lakehouse names
|
56
|
-
ws.create_lakehouse_if_not_exists("
|
74
|
+
ws.create_lakehouse_if_not_exists("New_Lakehouse")
|
57
75
|
|
58
76
|
# 2. Connect to lakehouse with a specific schema
|
59
|
-
con = duckrun.connect("My Workspace/
|
77
|
+
con = duckrun.connect("My Workspace/MyLakehouse.lakehouse/dbo")
|
60
78
|
|
61
|
-
#
|
62
|
-
con = duckrun.connect("Data Analytics/
|
79
|
+
# Workspace names with spaces are supported!
|
80
|
+
con = duckrun.connect("Data Analytics/SalesData.lakehouse/analytics")
|
63
81
|
|
64
82
|
# Schema defaults to 'dbo' if not specified (scans all schemas)
|
65
83
|
# ⚠️ WARNING: Scanning all schemas can be slow for large lakehouses!
|
66
|
-
con = duckrun.connect("My Workspace/
|
84
|
+
con = duckrun.connect("My Workspace/My_Lakehouse.lakehouse")
|
67
85
|
|
68
86
|
# 3. Explore data
|
69
87
|
con.sql("SELECT * FROM my_table LIMIT 10").show()
|
@@ -0,0 +1,13 @@
|
|
1
|
+
duckrun/__init__.py,sha256=XA85pL2vK1AkmBic8e7WxeqNvcd6SjFX4zsQpImDO6E,230
|
2
|
+
duckrun/auth.py,sha256=qColLkvmk8S_qRAXLMGh_TgVeSPkv0j15dv55wgrX1o,9139
|
3
|
+
duckrun/core.py,sha256=Ad7MgsWlEgW-qWddfjLsp72YvNxk_VmSC8_Q0qBQzpo,37335
|
4
|
+
duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
|
5
|
+
duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
|
6
|
+
duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
|
7
|
+
duckrun/stats.py,sha256=2FTqoQNVjD84-H1HjStHxZkOpAGKXS79M55B00pOlok,9804
|
8
|
+
duckrun/writer.py,sha256=3UwuoH4yjcomBaTbRXOSjlA82jRhhjErkOWDCX7K7mw,6595
|
9
|
+
duckrun-0.2.8.dev0.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
10
|
+
duckrun-0.2.8.dev0.dist-info/METADATA,sha256=nr-rrHcmW7R2aNN2pVAsn8VF-4U-mBo30vhrdlaSYvE,19277
|
11
|
+
duckrun-0.2.8.dev0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
12
|
+
duckrun-0.2.8.dev0.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
13
|
+
duckrun-0.2.8.dev0.dist-info/RECORD,,
|
duckrun-0.2.6.dist-info/RECORD
DELETED
@@ -1,12 +0,0 @@
|
|
1
|
-
duckrun/__init__.py,sha256=XA85pL2vK1AkmBic8e7WxeqNvcd6SjFX4zsQpImDO6E,230
|
2
|
-
duckrun/core.py,sha256=Y4-5H83Xw0mZa12QM5pcC7qOPidrDFASLcGIoUW3zwY,39394
|
3
|
-
duckrun/files.py,sha256=piWRU5w9jHrW-wuV4Gf-SKY_jhFv9eflxgWO8AZCQTI,10495
|
4
|
-
duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
|
5
|
-
duckrun/runner.py,sha256=XsQqWlesFD2cuhH2gsQj3Astg0XN7xhW15WPmr8D65I,13797
|
6
|
-
duckrun/stats.py,sha256=2FTqoQNVjD84-H1HjStHxZkOpAGKXS79M55B00pOlok,9804
|
7
|
-
duckrun/writer.py,sha256=eWrGtDQTbXi8H3sSt2WucYTdEQUjK97KmQxzCbqAuMs,6221
|
8
|
-
duckrun-0.2.6.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
9
|
-
duckrun-0.2.6.dist-info/METADATA,sha256=i8kvmnqpsddtJGq5GD44SFif0YTN-UFFviPyQZMPHn0,18799
|
10
|
-
duckrun-0.2.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
11
|
-
duckrun-0.2.6.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
12
|
-
duckrun-0.2.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|