duckrun 0.2.6__tar.gz → 0.2.8.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.6
3
+ Version: 0.2.8.dev0
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -26,7 +26,8 @@ A helper package for stuff that made my life easier when working with Fabric Pyt
26
26
 
27
27
  **Requirements:**
28
28
  - Lakehouse must have a schema (e.g., `dbo`, `sales`, `analytics`)
29
- - **Workspace and lakehouse names with spaces are now fully supported!** ✅
29
+ - **Workspace names with spaces are fully supported!** ✅
30
+
30
31
 
31
32
  **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
32
33
 
@@ -47,23 +48,40 @@ pip install duckrun[local]
47
48
 
48
49
  ## Quick Start
49
50
 
51
+ ### Simple Example for New Users
52
+
53
+ ```python
54
+ import duckrun
55
+
56
+ # Connect to a workspace and manage lakehouses
57
+ con = duckrun.connect('My Workspace')
58
+ con.list_lakehouses() # See what lakehouses exist
59
+ con.create_lakehouse_if_not_exists('data') # Create if needed
60
+
61
+ # Connect to a specific lakehouse and query data
62
+ con = duckrun.connect("My Workspace/data.lakehouse/dbo")
63
+ con.sql("SELECT * FROM my_table LIMIT 10").show()
64
+ ```
65
+
66
+ ### Full Feature Overview
67
+
50
68
  ```python
51
69
  import duckrun
52
70
 
53
71
  # 1. Workspace Management (list and create lakehouses)
54
72
  ws = duckrun.connect("My Workspace")
55
73
  lakehouses = ws.list_lakehouses() # Returns list of lakehouse names
56
- ws.create_lakehouse_if_not_exists("New Lakehouse")
74
+ ws.create_lakehouse_if_not_exists("New_Lakehouse")
57
75
 
58
76
  # 2. Connect to lakehouse with a specific schema
59
- con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
77
+ con = duckrun.connect("My Workspace/MyLakehouse.lakehouse/dbo")
60
78
 
61
- # Works with workspace names containing spaces!
62
- con = duckrun.connect("Data Analytics/Sales Data.lakehouse/analytics")
79
+ # Workspace names with spaces are supported!
80
+ con = duckrun.connect("Data Analytics/SalesData.lakehouse/analytics")
63
81
 
64
82
  # Schema defaults to 'dbo' if not specified (scans all schemas)
65
83
  # ⚠️ WARNING: Scanning all schemas can be slow for large lakehouses!
66
- con = duckrun.connect("My Workspace/My Lakehouse.lakehouse")
84
+ con = duckrun.connect("My Workspace/My_Lakehouse.lakehouse")
67
85
 
68
86
  # 3. Explore data
69
87
  con.sql("SELECT * FROM my_table LIMIT 10").show()
@@ -6,7 +6,8 @@ A helper package for stuff that made my life easier when working with Fabric Pyt
6
6
 
7
7
  **Requirements:**
8
8
  - Lakehouse must have a schema (e.g., `dbo`, `sales`, `analytics`)
9
- - **Workspace and lakehouse names with spaces are now fully supported!** ✅
9
+ - **Workspace names with spaces are fully supported!** ✅
10
+
10
11
 
11
12
  **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
12
13
 
@@ -27,23 +28,40 @@ pip install duckrun[local]
27
28
 
28
29
  ## Quick Start
29
30
 
31
+ ### Simple Example for New Users
32
+
33
+ ```python
34
+ import duckrun
35
+
36
+ # Connect to a workspace and manage lakehouses
37
+ con = duckrun.connect('My Workspace')
38
+ con.list_lakehouses() # See what lakehouses exist
39
+ con.create_lakehouse_if_not_exists('data') # Create if needed
40
+
41
+ # Connect to a specific lakehouse and query data
42
+ con = duckrun.connect("My Workspace/data.lakehouse/dbo")
43
+ con.sql("SELECT * FROM my_table LIMIT 10").show()
44
+ ```
45
+
46
+ ### Full Feature Overview
47
+
30
48
  ```python
31
49
  import duckrun
32
50
 
33
51
  # 1. Workspace Management (list and create lakehouses)
34
52
  ws = duckrun.connect("My Workspace")
35
53
  lakehouses = ws.list_lakehouses() # Returns list of lakehouse names
36
- ws.create_lakehouse_if_not_exists("New Lakehouse")
54
+ ws.create_lakehouse_if_not_exists("New_Lakehouse")
37
55
 
38
56
  # 2. Connect to lakehouse with a specific schema
39
- con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
57
+ con = duckrun.connect("My Workspace/MyLakehouse.lakehouse/dbo")
40
58
 
41
- # Works with workspace names containing spaces!
42
- con = duckrun.connect("Data Analytics/Sales Data.lakehouse/analytics")
59
+ # Workspace names with spaces are supported!
60
+ con = duckrun.connect("Data Analytics/SalesData.lakehouse/analytics")
43
61
 
44
62
  # Schema defaults to 'dbo' if not specified (scans all schemas)
45
63
  # ⚠️ WARNING: Scanning all schemas can be slow for large lakehouses!
46
- con = duckrun.connect("My Workspace/My Lakehouse.lakehouse")
64
+ con = duckrun.connect("My Workspace/My_Lakehouse.lakehouse")
47
65
 
48
66
  # 3. Explore data
49
67
  con.sql("SELECT * FROM my_table LIMIT 10").show()
@@ -0,0 +1,240 @@
1
+ """
2
+ Enhanced authentication module for duckrun - supports multiple notebook environments
3
+ """
4
+ import os
5
+ from typing import Optional, Tuple
6
+
7
+
8
+ def get_token() -> Optional[str]:
9
+ """
10
+ Smart authentication that works across multiple environments:
11
+ - Microsoft Fabric notebooks (uses notebookutils)
12
+ - Local environments with Azure CLI (uses CLI + browser fallback)
13
+ - Google Colab (uses device code flow)
14
+ - Other headless environments (uses device code flow)
15
+ - Existing token from environment (uses cached token)
16
+
17
+ Returns:
18
+ Azure Storage token string or None if authentication fails
19
+ """
20
+ # Check if we already have a cached token
21
+ token_env = os.environ.get("AZURE_STORAGE_TOKEN")
22
+ if token_env and token_env != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
23
+ print("✅ Using existing Azure Storage token")
24
+ return token_env
25
+
26
+ print("🔐 Starting Azure authentication...")
27
+
28
+ # Try Fabric notebook environment first
29
+ try:
30
+ import notebookutils # type: ignore
31
+ print("📓 Microsoft Fabric notebook detected - using notebookutils")
32
+ token = notebookutils.credentials.getToken("pbi")
33
+ os.environ["AZURE_STORAGE_TOKEN"] = token
34
+ print("✅ Fabric notebook authentication successful!")
35
+ return token
36
+ except ImportError:
37
+ pass # Not in Fabric notebook
38
+ except Exception as e:
39
+ print(f"⚠️ Fabric notebook authentication failed: {e}")
40
+
41
+ # Detect environment type for fallback authentication
42
+ try:
43
+ # Check if we're in Google Colab first
44
+ try:
45
+ import google.colab
46
+ print("🚀 Google Colab detected - using device code flow")
47
+ return _get_device_code_token()
48
+ except ImportError:
49
+ pass
50
+
51
+ # For all other environments (including VS Code), try Azure CLI first
52
+ # This includes local development, VS Code notebooks, etc.
53
+ print("🖥️ Local/VS Code environment detected - trying Azure CLI first, then browser fallback")
54
+ return _get_local_token()
55
+
56
+ except Exception as e:
57
+ print(f"❌ Authentication failed: {e}")
58
+ print("💡 Try refreshing and running again, or check your Azure permissions")
59
+ return None
60
+
61
+
62
+ def _get_device_code_token() -> Optional[str]:
63
+ """Get token using device code flow for headless environments"""
64
+ try:
65
+ from azure.identity import DeviceCodeCredential
66
+
67
+ # Use Azure CLI client ID for device code flow
68
+ credential = DeviceCodeCredential(
69
+ client_id="04b07795-8ddb-461a-bbee-02f9e1bf7b46", # Azure CLI client ID
70
+ tenant_id="common"
71
+ )
72
+
73
+ print("🔐 Follow the authentication prompts in your browser...")
74
+ token_obj = credential.get_token("https://storage.azure.com/.default")
75
+
76
+ os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
77
+ print("✅ Device code authentication successful!")
78
+ return token_obj.token
79
+
80
+ except Exception as e:
81
+ print(f"❌ Device code authentication failed: {e}")
82
+ return None
83
+
84
+
85
+ def _get_local_token() -> Optional[str]:
86
+ """Get token using CLI first, then browser fallback for local environments"""
87
+ # First try Azure CLI directly
88
+ try:
89
+ from azure.identity import AzureCliCredential
90
+ print("🔐 Trying Azure CLI authentication...")
91
+
92
+ cli_credential = AzureCliCredential()
93
+ token_obj = cli_credential.get_token("https://storage.azure.com/.default")
94
+
95
+ os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
96
+ print("✅ Azure CLI authentication successful!")
97
+ return token_obj.token
98
+
99
+ except Exception as cli_error:
100
+ print(f"⚠️ Azure CLI authentication failed: {cli_error}")
101
+ print("🔐 Falling back to interactive browser authentication...")
102
+
103
+ # Fallback to interactive browser
104
+ try:
105
+ from azure.identity import InteractiveBrowserCredential
106
+
107
+ browser_credential = InteractiveBrowserCredential()
108
+ token_obj = browser_credential.get_token("https://storage.azure.com/.default")
109
+
110
+ os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
111
+ print("✅ Interactive browser authentication successful!")
112
+ return token_obj.token
113
+
114
+ except Exception as browser_error:
115
+ print(f"❌ Interactive browser authentication failed: {browser_error}")
116
+ return None
117
+
118
+
119
+ def get_fabric_api_token() -> Optional[str]:
120
+ """
121
+ Get token for Fabric API operations (different scope than storage)
122
+
123
+ Returns:
124
+ Fabric API token string or None if authentication fails
125
+ """
126
+ print("🔐 Getting Fabric API token...")
127
+
128
+ # Try Fabric notebook environment first
129
+ try:
130
+ import notebookutils # type: ignore
131
+ print("📓 Microsoft Fabric notebook detected - using notebookutils")
132
+ token = notebookutils.credentials.getToken("pbi")
133
+ print("✅ Fabric API token obtained!")
134
+ return token
135
+ except ImportError:
136
+ pass # Not in Fabric notebook
137
+ except Exception as e:
138
+ print(f"⚠️ Fabric notebook token failed: {e}")
139
+
140
+ # Fallback to azure-identity for external environments
141
+ try:
142
+ # Check if we're in Google Colab
143
+ try:
144
+ import google.colab
145
+ print("💻 Using device code flow for Fabric API (Colab)")
146
+ from azure.identity import DeviceCodeCredential
147
+ credential = DeviceCodeCredential(
148
+ client_id="04b07795-8ddb-461a-bbee-02f9e1bf7b46",
149
+ tenant_id="common"
150
+ )
151
+ except ImportError:
152
+ # For all other environments, try CLI first then browser
153
+ print("🖥️ Using CLI + browser fallback for Fabric API")
154
+
155
+ # Try CLI first
156
+ try:
157
+ from azure.identity import AzureCliCredential
158
+ print("🔐 Trying Azure CLI for Fabric API...")
159
+ credential = AzureCliCredential()
160
+ token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
161
+ print("✅ Fabric API token obtained via Azure CLI!")
162
+ return token_obj.token
163
+ except Exception as cli_error:
164
+ print(f"⚠️ Azure CLI failed for Fabric API: {cli_error}")
165
+ print("🔐 Falling back to interactive browser for Fabric API...")
166
+ from azure.identity import InteractiveBrowserCredential
167
+ credential = InteractiveBrowserCredential()
168
+
169
+ token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
170
+ print("✅ Fabric API token obtained!")
171
+ return token_obj.token
172
+
173
+ except Exception as e:
174
+ print(f"❌ Fabric API authentication failed: {e}")
175
+ return None
176
+
177
+
178
+ def authenticate_for_environment() -> Tuple[bool, Optional[str]]:
179
+ """
180
+ Main authentication entry point - detects environment and authenticates appropriately
181
+
182
+ Returns:
183
+ Tuple of (success: bool, token: Optional[str])
184
+ """
185
+ print("\n🔍 Detecting execution environment...")
186
+
187
+ # Check environment
188
+ try:
189
+ import notebookutils # type: ignore
190
+ env_type = "Microsoft Fabric Notebook"
191
+ except ImportError:
192
+ try:
193
+ import google.colab
194
+ env_type = "Google Colab"
195
+ except ImportError:
196
+ # For all other environments (VS Code, local Python, etc.)
197
+ # we'll treat as local and try Azure CLI first
198
+ env_type = "Local/VS Code Environment"
199
+
200
+ print(f"📍 Environment: {env_type}")
201
+
202
+ token = get_token()
203
+ if token:
204
+ print(f"✅ Authentication successful for {env_type}")
205
+ return True, token
206
+ else:
207
+ print(f"❌ Authentication failed for {env_type}")
208
+ return False, None
209
+
210
+
211
+ # For backward compatibility - expose the same interface as before
212
+ def get_storage_token() -> str:
213
+ """
214
+ Backward compatible method - returns token or placeholder
215
+ """
216
+ token = get_token()
217
+ return token if token else "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE"
218
+
219
+
220
+ # Example usage function for testing
221
+ def test_authentication():
222
+ """
223
+ Test authentication in current environment
224
+ """
225
+ print("=" * 60)
226
+ print("🧪 TESTING DUCKRUN AUTHENTICATION")
227
+ print("=" * 60)
228
+
229
+ success, token = authenticate_for_environment()
230
+
231
+ if success:
232
+ print("\n✅ Authentication test successful!")
233
+ print(f"Token length: {len(token) if token else 0} characters")
234
+ print(f"Token starts with: {token[:20] if token else 'None'}...")
235
+ else:
236
+ print("\n❌ Authentication test failed!")
237
+ print("Please check your Azure setup and permissions.")
238
+
239
+ print("=" * 60)
240
+ return success
@@ -196,18 +196,19 @@ class Duckrun:
196
196
  print(f"🔍 Resolving '{workspace_name}' workspace and '{lakehouse_name}' lakehouse to GUIDs (workspace has spaces)...")
197
197
 
198
198
  try:
199
- # Get authentication token (try notebook environment first, then azure-identity)
199
+ # Get authentication token using enhanced auth system
200
+ from .auth import get_fabric_api_token
201
+ token = get_fabric_api_token()
202
+ if not token:
203
+ raise ValueError("Failed to obtain Fabric API token")
204
+
205
+ # Try to get current workspace ID if in notebook environment
206
+ current_workspace_id = None
200
207
  try:
201
208
  import notebookutils # type: ignore
202
- token = notebookutils.credentials.getToken("pbi")
203
209
  current_workspace_id = notebookutils.runtime.context.get("workspaceId")
204
210
  except ImportError:
205
- current_workspace_id = None
206
- # Fallback to azure-identity for external environments
207
- from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
208
- credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
209
- token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
210
- token = token_obj.token
211
+ pass # Not in notebook environment
211
212
 
212
213
  # Resolve workspace name to ID
213
214
  if current_workspace_id:
@@ -302,19 +303,23 @@ class Duckrun:
302
303
  return WorkspaceConnection(workspace_name)
303
304
 
304
305
  def _get_storage_token(self):
305
- return os.environ.get("AZURE_STORAGE_TOKEN", "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE")
306
+ from .auth import get_storage_token
307
+ return get_storage_token()
306
308
 
307
309
  def _create_onelake_secret(self):
308
310
  token = self._get_storage_token()
309
311
  if token != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
310
312
  self.con.sql(f"CREATE OR REPLACE SECRET onelake (TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{token}')")
311
313
  else:
312
- print("Authenticating with Azure (trying CLI, will fallback to browser if needed)...")
313
- from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
314
- credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
315
- token = credential.get_token("https://storage.azure.com/.default")
316
- os.environ["AZURE_STORAGE_TOKEN"] = token.token
317
- self.con.sql("CREATE OR REPLACE PERSISTENT SECRET onelake (TYPE azure, PROVIDER credential_chain, CHAIN 'cli', ACCOUNT_NAME 'onelake')")
314
+ # Enhanced authentication - try all methods
315
+ from .auth import get_token
316
+ token = get_token()
317
+ if token:
318
+ os.environ["AZURE_STORAGE_TOKEN"] = token
319
+ self.con.sql(f"CREATE OR REPLACE SECRET onelake (TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{token}')")
320
+ else:
321
+ # Final fallback to persistent secret
322
+ self.con.sql("CREATE OR REPLACE PERSISTENT SECRET onelake (TYPE azure, PROVIDER credential_chain, CHAIN 'cli', ACCOUNT_NAME 'onelake')")
318
323
 
319
324
  def _discover_tables_fast(self) -> List[Tuple[str, str]]:
320
325
  """
@@ -326,12 +331,12 @@ class Duckrun:
326
331
  """
327
332
  token = self._get_storage_token()
328
333
  if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
329
- print("Authenticating with Azure for table discovery (trying CLI, will fallback to browser if needed)...")
330
- from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
331
- credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
332
- token_obj = credential.get_token("https://storage.azure.com/.default")
333
- token = token_obj.token
334
- os.environ["AZURE_STORAGE_TOKEN"] = token
334
+ print("Authenticating with Azure for table discovery (detecting environment automatically)...")
335
+ from .auth import get_token
336
+ token = get_token()
337
+ if not token:
338
+ print("❌ Failed to authenticate for table discovery")
339
+ return []
335
340
 
336
341
  url = f"abfss://{self.workspace}@{self.storage_account}.dfs.fabric.microsoft.com/"
337
342
  store = AzureStore.from_url(url, bearer_token=token)
@@ -579,19 +584,22 @@ class Duckrun:
579
584
  List of lakehouse names
580
585
  """
581
586
  try:
582
- # Try to get token from notebook environment first
587
+ # Get authentication token using enhanced auth system
588
+ from .auth import get_fabric_api_token
589
+ token = get_fabric_api_token()
590
+ if not token:
591
+ print("❌ Failed to authenticate for listing lakehouses")
592
+ return []
593
+
594
+ # Try to get current workspace ID if in notebook environment
595
+ workspace_id = None
583
596
  try:
584
597
  import notebookutils # type: ignore
585
- token = notebookutils.credentials.getToken("pbi")
586
598
  workspace_id = notebookutils.runtime.context.get("workspaceId")
587
599
  except ImportError:
588
- # Fallback to azure-identity
589
- print("Getting authentication token...")
590
- from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
591
- credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
592
- token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
593
- token = token_obj.token
594
-
600
+ pass # Not in notebook environment
601
+
602
+ if not workspace_id:
595
603
  # Get workspace ID by name
596
604
  workspace_id = self._get_workspace_id_by_name(token, self.workspace)
597
605
  if not workspace_id:
@@ -626,19 +634,22 @@ class Duckrun:
626
634
  True if lakehouse exists or was created successfully, False otherwise
627
635
  """
628
636
  try:
629
- # Try to get token from notebook environment first
637
+ # Get authentication token using enhanced auth system
638
+ from .auth import get_fabric_api_token
639
+ token = get_fabric_api_token()
640
+ if not token:
641
+ print("❌ Failed to authenticate for lakehouse creation")
642
+ return False
643
+
644
+ # Try to get current workspace ID if in notebook environment
645
+ workspace_id = None
630
646
  try:
631
647
  import notebookutils # type: ignore
632
- token = notebookutils.credentials.getToken("pbi")
633
648
  workspace_id = notebookutils.runtime.context.get("workspaceId")
634
649
  except ImportError:
635
- # Fallback to azure-identity
636
- print("Getting authentication token...")
637
- from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
638
- credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
639
- token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
640
- token = token_obj.token
641
-
650
+ pass # Not in notebook environment
651
+
652
+ if not workspace_id:
642
653
  # Get workspace ID by name
643
654
  workspace_id = self._get_workspace_id_by_name(token, self.workspace)
644
655
  if not workspace_id:
@@ -718,28 +729,18 @@ class WorkspaceConnection:
718
729
  List of lakehouse names
719
730
  """
720
731
  try:
721
- # Try to get token from notebook environment first
722
- try:
723
- import notebookutils # type: ignore
724
- token = notebookutils.credentials.getToken("pbi")
725
- # Always resolve workspace name to ID, even in notebook environment
726
- workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
727
- if not workspace_id:
728
- print(f"Workspace '{self.workspace_name}' not found")
729
- return []
730
- except ImportError:
731
- # Fallback to azure-identity
732
- print("Getting authentication token...")
733
- from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
734
- credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
735
- token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
736
- token = token_obj.token
737
-
738
- # Get workspace ID by name
739
- workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
740
- if not workspace_id:
741
- print(f"Workspace '{self.workspace_name}' not found")
742
- return []
732
+ # Get authentication token using enhanced auth system
733
+ from .auth import get_fabric_api_token
734
+ token = get_fabric_api_token()
735
+ if not token:
736
+ print("❌ Failed to authenticate for listing lakehouses")
737
+ return []
738
+
739
+ # Always resolve workspace name to ID, even in notebook environment
740
+ workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
741
+ if not workspace_id:
742
+ print(f"Workspace '{self.workspace_name}' not found")
743
+ return []
743
744
 
744
745
  # List lakehouses
745
746
  url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses"
@@ -768,28 +769,18 @@ class WorkspaceConnection:
768
769
  True if lakehouse exists or was created successfully, False otherwise
769
770
  """
770
771
  try:
771
- # Try to get token from notebook environment first
772
- try:
773
- import notebookutils # type: ignore
774
- token = notebookutils.credentials.getToken("pbi")
775
- # Always resolve workspace name to ID, even in notebook environment
776
- workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
777
- if not workspace_id:
778
- print(f"Workspace '{self.workspace_name}' not found")
779
- return False
780
- except ImportError:
781
- # Fallback to azure-identity
782
- print("Getting authentication token...")
783
- from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
784
- credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
785
- token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
786
- token = token_obj.token
787
-
788
- # Get workspace ID by name
789
- workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
790
- if not workspace_id:
791
- print(f"Workspace '{self.workspace_name}' not found")
792
- return False
772
+ # Get authentication token using enhanced auth system
773
+ from .auth import get_fabric_api_token
774
+ token = get_fabric_api_token()
775
+ if not token:
776
+ print("❌ Failed to authenticate for lakehouse creation")
777
+ return False
778
+
779
+ # Always resolve workspace name to ID, even in notebook environment
780
+ workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
781
+ if not workspace_id:
782
+ print(f"Workspace '{self.workspace_name}' not found")
783
+ return False
793
784
 
794
785
  # Check if lakehouse already exists
795
786
  url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses"
@@ -41,15 +41,15 @@ def copy(duckrun_instance, local_folder: str, remote_folder: str,
41
41
  print(f"❌ Path is not a directory: {local_folder}")
42
42
  return False
43
43
 
44
- # Get Azure token
44
+ # Get Azure token using enhanced auth system
45
+ from .auth import get_token
45
46
  token = duckrun_instance._get_storage_token()
46
47
  if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
47
- print("Authenticating with Azure for file upload (trying CLI, will fallback to browser if needed)...")
48
- from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
49
- credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
50
- token_obj = credential.get_token("https://storage.azure.com/.default")
51
- token = token_obj.token
52
- os.environ["AZURE_STORAGE_TOKEN"] = token
48
+ print("Authenticating with Azure for file upload (detecting environment automatically)...")
49
+ token = get_token()
50
+ if not token:
51
+ print("❌ Failed to authenticate for file upload")
52
+ return False
53
53
 
54
54
  # Setup OneLake Files URL (use correct format without .Lakehouse suffix)
55
55
  files_base_url = duckrun_instance.files_base_url
@@ -150,15 +150,15 @@ def download(duckrun_instance, remote_folder: str = "", local_folder: str = "./d
150
150
  # Download only CSV files from a specific subfolder
151
151
  dr.download("daily_reports", "./reports", ['.csv'])
152
152
  """
153
- # Get Azure token
153
+ # Get Azure token using enhanced auth system
154
+ from .auth import get_token
154
155
  token = duckrun_instance._get_storage_token()
155
156
  if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
156
- print("Authenticating with Azure for file download (trying CLI, will fallback to browser if needed)...")
157
- from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
158
- credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
159
- token_obj = credential.get_token("https://storage.azure.com/.default")
160
- token = token_obj.token
161
- os.environ["AZURE_STORAGE_TOKEN"] = token
157
+ print("Authenticating with Azure for file download (detecting environment automatically)...")
158
+ token = get_token()
159
+ if not token:
160
+ print("❌ Failed to authenticate for file download")
161
+ return False
162
162
 
163
163
  # Setup OneLake Files URL (use correct format without .Lakehouse suffix)
164
164
  files_base_url = duckrun_instance.files_base_url
@@ -15,7 +15,7 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
15
15
  """
16
16
  Build arguments for write_deltalake based on requirements:
17
17
  - If schema_mode='merge': use rust engine (no row group params)
18
- - Otherwise: use pyarrow engine with row group optimization
18
+ - Otherwise: use pyarrow engine with row group optimization (if supported)
19
19
  """
20
20
  args = {
21
21
  'table_or_uri': path,
@@ -33,10 +33,17 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
33
33
  args['schema_mode'] = 'merge'
34
34
  args['engine'] = 'rust'
35
35
  else:
36
- # Use pyarrow engine with row group optimization (default)
37
- args['max_rows_per_file'] = RG
38
- args['max_rows_per_group'] = RG
39
- args['min_rows_per_group'] = RG
36
+ # Try to use pyarrow engine with row group optimization
37
+ # Check if row group parameters are supported by inspecting function signature
38
+ import inspect
39
+ sig = inspect.signature(write_deltalake)
40
+
41
+ if 'max_rows_per_file' in sig.parameters:
42
+ # Older deltalake version - use row group optimization
43
+ args['max_rows_per_file'] = RG
44
+ args['max_rows_per_group'] = RG
45
+ args['min_rows_per_group'] = RG
46
+ # For newer versions, just use default parameters
40
47
 
41
48
  return args
42
49
 
@@ -12,7 +12,7 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
12
12
  """
13
13
  Build arguments for write_deltalake based on requirements:
14
14
  - If schema_mode='merge': use rust engine (no row group params)
15
- - Otherwise: use pyarrow engine with row group optimization
15
+ - Otherwise: use pyarrow engine with row group optimization (if supported)
16
16
  """
17
17
  args = {
18
18
  'table_or_uri': path,
@@ -30,10 +30,17 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
30
30
  args['schema_mode'] = 'merge'
31
31
  args['engine'] = 'rust'
32
32
  else:
33
- # Use pyarrow engine with row group optimization (default)
34
- args['max_rows_per_file'] = RG
35
- args['max_rows_per_group'] = RG
36
- args['min_rows_per_group'] = RG
33
+ # Try to use pyarrow engine with row group optimization
34
+ # Check if row group parameters are supported by inspecting function signature
35
+ import inspect
36
+ sig = inspect.signature(write_deltalake)
37
+
38
+ if 'max_rows_per_file' in sig.parameters:
39
+ # Older deltalake version - use row group optimization
40
+ args['max_rows_per_file'] = RG
41
+ args['max_rows_per_group'] = RG
42
+ args['min_rows_per_group'] = RG
43
+ # For newer versions, just use default parameters
37
44
 
38
45
  return args
39
46
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.2.6
3
+ Version: 0.2.8.dev0
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -26,7 +26,8 @@ A helper package for stuff that made my life easier when working with Fabric Pyt
26
26
 
27
27
  **Requirements:**
28
28
  - Lakehouse must have a schema (e.g., `dbo`, `sales`, `analytics`)
29
- - **Workspace and lakehouse names with spaces are now fully supported!** ✅
29
+ - **Workspace names with spaces are fully supported!** ✅
30
+
30
31
 
31
32
  **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
32
33
 
@@ -47,23 +48,40 @@ pip install duckrun[local]
47
48
 
48
49
  ## Quick Start
49
50
 
51
+ ### Simple Example for New Users
52
+
53
+ ```python
54
+ import duckrun
55
+
56
+ # Connect to a workspace and manage lakehouses
57
+ con = duckrun.connect('My Workspace')
58
+ con.list_lakehouses() # See what lakehouses exist
59
+ con.create_lakehouse_if_not_exists('data') # Create if needed
60
+
61
+ # Connect to a specific lakehouse and query data
62
+ con = duckrun.connect("My Workspace/data.lakehouse/dbo")
63
+ con.sql("SELECT * FROM my_table LIMIT 10").show()
64
+ ```
65
+
66
+ ### Full Feature Overview
67
+
50
68
  ```python
51
69
  import duckrun
52
70
 
53
71
  # 1. Workspace Management (list and create lakehouses)
54
72
  ws = duckrun.connect("My Workspace")
55
73
  lakehouses = ws.list_lakehouses() # Returns list of lakehouse names
56
- ws.create_lakehouse_if_not_exists("New Lakehouse")
74
+ ws.create_lakehouse_if_not_exists("New_Lakehouse")
57
75
 
58
76
  # 2. Connect to lakehouse with a specific schema
59
- con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
77
+ con = duckrun.connect("My Workspace/MyLakehouse.lakehouse/dbo")
60
78
 
61
- # Works with workspace names containing spaces!
62
- con = duckrun.connect("Data Analytics/Sales Data.lakehouse/analytics")
79
+ # Workspace names with spaces are supported!
80
+ con = duckrun.connect("Data Analytics/SalesData.lakehouse/analytics")
63
81
 
64
82
  # Schema defaults to 'dbo' if not specified (scans all schemas)
65
83
  # ⚠️ WARNING: Scanning all schemas can be slow for large lakehouses!
66
- con = duckrun.connect("My Workspace/My Lakehouse.lakehouse")
84
+ con = duckrun.connect("My Workspace/My_Lakehouse.lakehouse")
67
85
 
68
86
  # 3. Explore data
69
87
  con.sql("SELECT * FROM my_table LIMIT 10").show()
@@ -2,6 +2,7 @@ LICENSE
2
2
  README.md
3
3
  pyproject.toml
4
4
  duckrun/__init__.py
5
+ duckrun/auth.py
5
6
  duckrun/core.py
6
7
  duckrun/files.py
7
8
  duckrun/lakehouse.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "duckrun"
7
- version = "0.2.6"
7
+ version = "0.2.8.dev0"
8
8
  description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
File without changes
File without changes
File without changes