PyPI - duckrun - Versions diffs - 0.2.6__tar.gz → 0.2.8.dev0__tar.gz - Mend

duckrun 0.2.6tar.gz → 0.2.8.dev0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{duckrun-0.2.6 → duckrun-0.2.8.dev0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: duckrun
-Version: 0.2.6
+Version: 0.2.8.dev0
 Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
 Author: mim
 License: MIT
@@ -26,7 +26,8 @@ A helper package for stuff that made my life easier when working with Fabric Pyt
 **Requirements:**
 - Lakehouse must have a schema (e.g., `dbo`, `sales`, `analytics`)
-- **Workspace and lakehouse names with spaces are now fully supported!** ✅
+- **Workspace names with spaces are fully supported!** ✅
 **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
@@ -47,23 +48,40 @@ pip install duckrun[local]
 ## Quick Start
+### Simple Example for New Users
+```python
+import duckrun
+# Connect to a workspace and manage lakehouses
+con = duckrun.connect('My Workspace')
+con.list_lakehouses()                           # See what lakehouses exist
+con.create_lakehouse_if_not_exists('data')      # Create if needed
+# Connect to a specific lakehouse and query data
+con = duckrun.connect("My Workspace/data.lakehouse/dbo")
+con.sql("SELECT * FROM my_table LIMIT 10").show()
+```
+### Full Feature Overview
 ```python
 import duckrun
 # 1. Workspace Management (list and create lakehouses)
 ws = duckrun.connect("My Workspace")
 lakehouses = ws.list_lakehouses()  # Returns list of lakehouse names
-ws.create_lakehouse_if_not_exists("New Lakehouse")
+ws.create_lakehouse_if_not_exists("New_Lakehouse")
 # 2. Connect to lakehouse with a specific schema
-con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
+con = duckrun.connect("My Workspace/MyLakehouse.lakehouse/dbo")
-# Works with workspace names containing spaces!
-con = duckrun.connect("Data Analytics/Sales Data.lakehouse/analytics")
+# Workspace names with spaces are supported!
+con = duckrun.connect("Data Analytics/SalesData.lakehouse/analytics")
 # Schema defaults to 'dbo' if not specified (scans all schemas)
 # ⚠️ WARNING: Scanning all schemas can be slow for large lakehouses!
-con = duckrun.connect("My Workspace/My Lakehouse.lakehouse")
+con = duckrun.connect("My Workspace/My_Lakehouse.lakehouse")
 # 3. Explore data
 con.sql("SELECT * FROM my_table LIMIT 10").show()

{duckrun-0.2.6 → duckrun-0.2.8.dev0}/README.md RENAMED Viewed

@@ -6,7 +6,8 @@ A helper package for stuff that made my life easier when working with Fabric Pyt
 **Requirements:**
 - Lakehouse must have a schema (e.g., `dbo`, `sales`, `analytics`)
-- **Workspace and lakehouse names with spaces are now fully supported!** ✅
+- **Workspace names with spaces are fully supported!** ✅
 **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
@@ -27,23 +28,40 @@ pip install duckrun[local]
 ## Quick Start
+### Simple Example for New Users
+```python
+import duckrun
+# Connect to a workspace and manage lakehouses
+con = duckrun.connect('My Workspace')
+con.list_lakehouses()                           # See what lakehouses exist
+con.create_lakehouse_if_not_exists('data')      # Create if needed
+# Connect to a specific lakehouse and query data
+con = duckrun.connect("My Workspace/data.lakehouse/dbo")
+con.sql("SELECT * FROM my_table LIMIT 10").show()
+```
+### Full Feature Overview
 ```python
 import duckrun
 # 1. Workspace Management (list and create lakehouses)
 ws = duckrun.connect("My Workspace")
 lakehouses = ws.list_lakehouses()  # Returns list of lakehouse names
-ws.create_lakehouse_if_not_exists("New Lakehouse")
+ws.create_lakehouse_if_not_exists("New_Lakehouse")
 # 2. Connect to lakehouse with a specific schema
-con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
+con = duckrun.connect("My Workspace/MyLakehouse.lakehouse/dbo")
-# Works with workspace names containing spaces!
-con = duckrun.connect("Data Analytics/Sales Data.lakehouse/analytics")
+# Workspace names with spaces are supported!
+con = duckrun.connect("Data Analytics/SalesData.lakehouse/analytics")
 # Schema defaults to 'dbo' if not specified (scans all schemas)
 # ⚠️ WARNING: Scanning all schemas can be slow for large lakehouses!
-con = duckrun.connect("My Workspace/My Lakehouse.lakehouse")
+con = duckrun.connect("My Workspace/My_Lakehouse.lakehouse")
 # 3. Explore data
 con.sql("SELECT * FROM my_table LIMIT 10").show()

duckrun-0.2.8.dev0/duckrun/auth.py ADDED Viewed

@@ -0,0 +1,240 @@
+"""
+Enhanced authentication module for duckrun - supports multiple notebook environments
+"""
+import os
+from typing import Optional, Tuple
+def get_token() -> Optional[str]:
+    """
+    Smart authentication that works across multiple environments:
+    - Microsoft Fabric notebooks (uses notebookutils)
+    - Local environments with Azure CLI (uses CLI + browser fallback)
+    - Google Colab (uses device code flow)
+    - Other headless environments (uses device code flow)
+    - Existing token from environment (uses cached token)
+    Returns:
+        Azure Storage token string or None if authentication fails
+    """
+    # Check if we already have a cached token
+    token_env = os.environ.get("AZURE_STORAGE_TOKEN")
+    if token_env and token_env != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
+        print("✅ Using existing Azure Storage token")
+        return token_env
+    print("🔐 Starting Azure authentication...")
+    # Try Fabric notebook environment first
+    try:
+        import notebookutils  # type: ignore
+        print("📓 Microsoft Fabric notebook detected - using notebookutils")
+        token = notebookutils.credentials.getToken("pbi")
+        os.environ["AZURE_STORAGE_TOKEN"] = token
+        print("✅ Fabric notebook authentication successful!")
+        return token
+    except ImportError:
+        pass  # Not in Fabric notebook
+    except Exception as e:
+        print(f"⚠️ Fabric notebook authentication failed: {e}")
+    # Detect environment type for fallback authentication
+    try:
+        # Check if we're in Google Colab first
+        try:
+            import google.colab
+            print("🚀 Google Colab detected - using device code flow")
+            return _get_device_code_token()
+        except ImportError:
+            pass
+        # For all other environments (including VS Code), try Azure CLI first
+        # This includes local development, VS Code notebooks, etc.
+        print("🖥️ Local/VS Code environment detected - trying Azure CLI first, then browser fallback")
+        return _get_local_token()
+    except Exception as e:
+        print(f"❌ Authentication failed: {e}")
+        print("💡 Try refreshing and running again, or check your Azure permissions")
+        return None
+def _get_device_code_token() -> Optional[str]:
+    """Get token using device code flow for headless environments"""
+    try:
+        from azure.identity import DeviceCodeCredential
+        # Use Azure CLI client ID for device code flow
+        credential = DeviceCodeCredential(
+            client_id="04b07795-8ddb-461a-bbee-02f9e1bf7b46",  # Azure CLI client ID
+            tenant_id="common"
+        )
+        print("🔐 Follow the authentication prompts in your browser...")
+        token_obj = credential.get_token("https://storage.azure.com/.default")
+        os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
+        print("✅ Device code authentication successful!")
+        return token_obj.token
+    except Exception as e:
+        print(f"❌ Device code authentication failed: {e}")
+        return None
+def _get_local_token() -> Optional[str]:
+    """Get token using CLI first, then browser fallback for local environments"""
+    # First try Azure CLI directly
+    try:
+        from azure.identity import AzureCliCredential
+        print("🔐 Trying Azure CLI authentication...")
+        cli_credential = AzureCliCredential()
+        token_obj = cli_credential.get_token("https://storage.azure.com/.default")
+        os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
+        print("✅ Azure CLI authentication successful!")
+        return token_obj.token
+    except Exception as cli_error:
+        print(f"⚠️ Azure CLI authentication failed: {cli_error}")
+        print("🔐 Falling back to interactive browser authentication...")
+        # Fallback to interactive browser
+        try:
+            from azure.identity import InteractiveBrowserCredential
+            browser_credential = InteractiveBrowserCredential()
+            token_obj = browser_credential.get_token("https://storage.azure.com/.default")
+            os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
+            print("✅ Interactive browser authentication successful!")
+            return token_obj.token
+        except Exception as browser_error:
+            print(f"❌ Interactive browser authentication failed: {browser_error}")
+            return None
+def get_fabric_api_token() -> Optional[str]:
+    """
+    Get token for Fabric API operations (different scope than storage)
+    Returns:
+        Fabric API token string or None if authentication fails
+    """
+    print("🔐 Getting Fabric API token...")
+    # Try Fabric notebook environment first
+    try:
+        import notebookutils  # type: ignore
+        print("📓 Microsoft Fabric notebook detected - using notebookutils")
+        token = notebookutils.credentials.getToken("pbi")
+        print("✅ Fabric API token obtained!")
+        return token
+    except ImportError:
+        pass  # Not in Fabric notebook
+    except Exception as e:
+        print(f"⚠️ Fabric notebook token failed: {e}")
+    # Fallback to azure-identity for external environments
+    try:
+        # Check if we're in Google Colab
+        try:
+            import google.colab
+            print("💻 Using device code flow for Fabric API (Colab)")
+            from azure.identity import DeviceCodeCredential
+            credential = DeviceCodeCredential(
+                client_id="04b07795-8ddb-461a-bbee-02f9e1bf7b46",
+                tenant_id="common"
+            )
+        except ImportError:
+            # For all other environments, try CLI first then browser
+            print("🖥️ Using CLI + browser fallback for Fabric API")
+            # Try CLI first
+            try:
+                from azure.identity import AzureCliCredential
+                print("🔐 Trying Azure CLI for Fabric API...")
+                credential = AzureCliCredential()
+                token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
+                print("✅ Fabric API token obtained via Azure CLI!")
+                return token_obj.token
+            except Exception as cli_error:
+                print(f"⚠️ Azure CLI failed for Fabric API: {cli_error}")
+                print("🔐 Falling back to interactive browser for Fabric API...")
+                from azure.identity import InteractiveBrowserCredential
+                credential = InteractiveBrowserCredential()
+        token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
+        print("✅ Fabric API token obtained!")
+        return token_obj.token
+    except Exception as e:
+        print(f"❌ Fabric API authentication failed: {e}")
+        return None
+def authenticate_for_environment() -> Tuple[bool, Optional[str]]:
+    """
+    Main authentication entry point - detects environment and authenticates appropriately
+    Returns:
+        Tuple of (success: bool, token: Optional[str])
+    """
+    print("\n🔍 Detecting execution environment...")
+    # Check environment
+    try:
+        import notebookutils  # type: ignore
+        env_type = "Microsoft Fabric Notebook"
+    except ImportError:
+        try:
+            import google.colab
+            env_type = "Google Colab"
+        except ImportError:
+            # For all other environments (VS Code, local Python, etc.)
+            # we'll treat as local and try Azure CLI first
+            env_type = "Local/VS Code Environment"
+    print(f"📍 Environment: {env_type}")
+    token = get_token()
+    if token:
+        print(f"✅ Authentication successful for {env_type}")
+        return True, token
+    else:
+        print(f"❌ Authentication failed for {env_type}")
+        return False, None
+# For backward compatibility - expose the same interface as before
+def get_storage_token() -> str:
+    """
+    Backward compatible method - returns token or placeholder
+    """
+    token = get_token()
+    return token if token else "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE"
+# Example usage function for testing
+def test_authentication():
+    """
+    Test authentication in current environment
+    """
+    print("=" * 60)
+    print("🧪 TESTING DUCKRUN AUTHENTICATION")
+    print("=" * 60)
+    success, token = authenticate_for_environment()
+    if success:
+        print("\n✅ Authentication test successful!")
+        print(f"Token length: {len(token) if token else 0} characters")
+        print(f"Token starts with: {token[:20] if token else 'None'}...")
+    else:
+        print("\n❌ Authentication test failed!")
+        print("Please check your Azure setup and permissions.")
+    print("=" * 60)
+    return success

{duckrun-0.2.6 → duckrun-0.2.8.dev0}/duckrun/core.py RENAMED Viewed

@@ -196,18 +196,19 @@ class Duckrun:
         print(f"🔍 Resolving '{workspace_name}' workspace and '{lakehouse_name}' lakehouse to GUIDs (workspace has spaces)...")
         try:
-            # Get authentication token (try notebook environment first, then azure-identity)
+            # Get authentication token using enhanced auth system
+            from .auth import get_fabric_api_token
+            token = get_fabric_api_token()
+            if not token:
+                raise ValueError("Failed to obtain Fabric API token")
+            # Try to get current workspace ID if in notebook environment
+            current_workspace_id = None
             try:
                 import notebookutils  # type: ignore
-                token = notebookutils.credentials.getToken("pbi")
                 current_workspace_id = notebookutils.runtime.context.get("workspaceId")
             except ImportError:
-                current_workspace_id = None
-                # Fallback to azure-identity for external environments
-                from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
-                credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
-                token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
-                token = token_obj.token
+                pass  # Not in notebook environment
             # Resolve workspace name to ID
             if current_workspace_id:
@@ -302,19 +303,23 @@ class Duckrun:
         return WorkspaceConnection(workspace_name)
     def _get_storage_token(self):
-        return os.environ.get("AZURE_STORAGE_TOKEN", "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE")
+        from .auth import get_storage_token
+        return get_storage_token()
     def _create_onelake_secret(self):
         token = self._get_storage_token()
         if token != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
             self.con.sql(f"CREATE OR REPLACE SECRET onelake (TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{token}')")
         else:
-            print("Authenticating with Azure (trying CLI, will fallback to browser if needed)...")
-            from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
-            credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
-            token = credential.get_token("https://storage.azure.com/.default")
-            os.environ["AZURE_STORAGE_TOKEN"] = token.token
-            self.con.sql("CREATE OR REPLACE PERSISTENT SECRET onelake (TYPE azure, PROVIDER credential_chain, CHAIN 'cli', ACCOUNT_NAME 'onelake')")
+            # Enhanced authentication - try all methods
+            from .auth import get_token
+            token = get_token()
+            if token:
+                os.environ["AZURE_STORAGE_TOKEN"] = token
+                self.con.sql(f"CREATE OR REPLACE SECRET onelake (TYPE AZURE, PROVIDER ACCESS_TOKEN, ACCESS_TOKEN '{token}')")
+            else:
+                # Final fallback to persistent secret
+                self.con.sql("CREATE OR REPLACE PERSISTENT SECRET onelake (TYPE azure, PROVIDER credential_chain, CHAIN 'cli', ACCOUNT_NAME 'onelake')")
     def _discover_tables_fast(self) -> List[Tuple[str, str]]:
         """
@@ -326,12 +331,12 @@ class Duckrun:
         """
         token = self._get_storage_token()
         if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
-            print("Authenticating with Azure for table discovery (trying CLI, will fallback to browser if needed)...")
-            from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
-            credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
-            token_obj = credential.get_token("https://storage.azure.com/.default")
-            token = token_obj.token
-            os.environ["AZURE_STORAGE_TOKEN"] = token
+            print("Authenticating with Azure for table discovery (detecting environment automatically)...")
+            from .auth import get_token
+            token = get_token()
+            if not token:
+                print("❌ Failed to authenticate for table discovery")
+                return []
         url = f"abfss://{self.workspace}@{self.storage_account}.dfs.fabric.microsoft.com/"
         store = AzureStore.from_url(url, bearer_token=token)
@@ -579,19 +584,22 @@ class Duckrun:
             List of lakehouse names
         """
         try:
-            # Try to get token from notebook environment first
+            # Get authentication token using enhanced auth system
+            from .auth import get_fabric_api_token
+            token = get_fabric_api_token()
+            if not token:
+                print("❌ Failed to authenticate for listing lakehouses")
+                return []
+            # Try to get current workspace ID if in notebook environment
+            workspace_id = None
             try:
                 import notebookutils  # type: ignore
-                token = notebookutils.credentials.getToken("pbi")
                 workspace_id = notebookutils.runtime.context.get("workspaceId")
             except ImportError:
-                # Fallback to azure-identity
-                print("Getting authentication token...")
-                from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
-                credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
-                token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
-                token = token_obj.token
+                pass  # Not in notebook environment
+            if not workspace_id:
                 # Get workspace ID by name
                 workspace_id = self._get_workspace_id_by_name(token, self.workspace)
                 if not workspace_id:
@@ -626,19 +634,22 @@ class Duckrun:
             True if lakehouse exists or was created successfully, False otherwise
         """
         try:
-            # Try to get token from notebook environment first
+            # Get authentication token using enhanced auth system
+            from .auth import get_fabric_api_token
+            token = get_fabric_api_token()
+            if not token:
+                print("❌ Failed to authenticate for lakehouse creation")
+                return False
+            # Try to get current workspace ID if in notebook environment
+            workspace_id = None
             try:
                 import notebookutils  # type: ignore
-                token = notebookutils.credentials.getToken("pbi")
                 workspace_id = notebookutils.runtime.context.get("workspaceId")
             except ImportError:
-                # Fallback to azure-identity
-                print("Getting authentication token...")
-                from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
-                credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
-                token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
-                token = token_obj.token
+                pass  # Not in notebook environment
+            if not workspace_id:
                 # Get workspace ID by name
                 workspace_id = self._get_workspace_id_by_name(token, self.workspace)
                 if not workspace_id:
@@ -718,28 +729,18 @@ class WorkspaceConnection:
             List of lakehouse names
         """
         try:
-            # Try to get token from notebook environment first
-            try:
-                import notebookutils  # type: ignore
-                token = notebookutils.credentials.getToken("pbi")
-                # Always resolve workspace name to ID, even in notebook environment
-                workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
-                if not workspace_id:
-                    print(f"Workspace '{self.workspace_name}' not found")
-                    return []
-            except ImportError:
-                # Fallback to azure-identity
-                print("Getting authentication token...")
-                from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
-                credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
-                token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
-                token = token_obj.token
-                # Get workspace ID by name
-                workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
-                if not workspace_id:
-                    print(f"Workspace '{self.workspace_name}' not found")
-                    return []
+            # Get authentication token using enhanced auth system
+            from .auth import get_fabric_api_token
+            token = get_fabric_api_token()
+            if not token:
+                print("❌ Failed to authenticate for listing lakehouses")
+                return []
+            # Always resolve workspace name to ID, even in notebook environment
+            workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
+            if not workspace_id:
+                print(f"Workspace '{self.workspace_name}' not found")
+                return []
             # List lakehouses
             url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses"
@@ -768,28 +769,18 @@ class WorkspaceConnection:
             True if lakehouse exists or was created successfully, False otherwise
         """
         try:
-            # Try to get token from notebook environment first
-            try:
-                import notebookutils  # type: ignore
-                token = notebookutils.credentials.getToken("pbi")
-                # Always resolve workspace name to ID, even in notebook environment
-                workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
-                if not workspace_id:
-                    print(f"Workspace '{self.workspace_name}' not found")
-                    return False
-            except ImportError:
-                # Fallback to azure-identity
-                print("Getting authentication token...")
-                from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
-                credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
-                token_obj = credential.get_token("https://api.fabric.microsoft.com/.default")
-                token = token_obj.token
-                # Get workspace ID by name
-                workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
-                if not workspace_id:
-                    print(f"Workspace '{self.workspace_name}' not found")
-                    return False
+            # Get authentication token using enhanced auth system
+            from .auth import get_fabric_api_token
+            token = get_fabric_api_token()
+            if not token:
+                print("❌ Failed to authenticate for lakehouse creation")
+                return False
+            # Always resolve workspace name to ID, even in notebook environment
+            workspace_id = self._get_workspace_id_by_name(token, self.workspace_name)
+            if not workspace_id:
+                print(f"Workspace '{self.workspace_name}' not found")
+                return False
             # Check if lakehouse already exists
             url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/lakehouses"

{duckrun-0.2.6 → duckrun-0.2.8.dev0}/duckrun/files.py RENAMED Viewed

@@ -41,15 +41,15 @@ def copy(duckrun_instance, local_folder: str, remote_folder: str,
         print(f"❌ Path is not a directory: {local_folder}")
         return False
-    # Get Azure token
+    # Get Azure token using enhanced auth system
+    from .auth import get_token
     token = duckrun_instance._get_storage_token()
     if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
-        print("Authenticating with Azure for file upload (trying CLI, will fallback to browser if needed)...")
-        from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
-        credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
-        token_obj = credential.get_token("https://storage.azure.com/.default")
-        token = token_obj.token
-        os.environ["AZURE_STORAGE_TOKEN"] = token
+        print("Authenticating with Azure for file upload (detecting environment automatically)...")
+        token = get_token()
+        if not token:
+            print("❌ Failed to authenticate for file upload")
+            return False
     # Setup OneLake Files URL (use correct format without .Lakehouse suffix)
     files_base_url = duckrun_instance.files_base_url
@@ -150,15 +150,15 @@ def download(duckrun_instance, remote_folder: str = "", local_folder: str = "./d
         # Download only CSV files from a specific subfolder
         dr.download("daily_reports", "./reports", ['.csv'])
     """
-    # Get Azure token
+    # Get Azure token using enhanced auth system
+    from .auth import get_token
     token = duckrun_instance._get_storage_token()
     if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
-        print("Authenticating with Azure for file download (trying CLI, will fallback to browser if needed)...")
-        from azure.identity import AzureCliCredential, InteractiveBrowserCredential, ChainedTokenCredential
-        credential = ChainedTokenCredential(AzureCliCredential(), InteractiveBrowserCredential())
-        token_obj = credential.get_token("https://storage.azure.com/.default")
-        token = token_obj.token
-        os.environ["AZURE_STORAGE_TOKEN"] = token
+        print("Authenticating with Azure for file download (detecting environment automatically)...")
+        token = get_token()
+        if not token:
+            print("❌ Failed to authenticate for file download")
+            return False
     # Setup OneLake Files URL (use correct format without .Lakehouse suffix)
     files_base_url = duckrun_instance.files_base_url

{duckrun-0.2.6 → duckrun-0.2.8.dev0}/duckrun/runner.py RENAMED Viewed

@@ -15,7 +15,7 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
     """
     Build arguments for write_deltalake based on requirements:
     - If schema_mode='merge': use rust engine (no row group params)
-    - Otherwise: use pyarrow engine with row group optimization
+    - Otherwise: use pyarrow engine with row group optimization (if supported)
     """
     args = {
         'table_or_uri': path,
@@ -33,10 +33,17 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
         args['schema_mode'] = 'merge'
         args['engine'] = 'rust'
     else:
-        # Use pyarrow engine with row group optimization (default)
-        args['max_rows_per_file'] = RG
-        args['max_rows_per_group'] = RG
-        args['min_rows_per_group'] = RG
+        # Try to use pyarrow engine with row group optimization
+        # Check if row group parameters are supported by inspecting function signature
+        import inspect
+        sig = inspect.signature(write_deltalake)
+        if 'max_rows_per_file' in sig.parameters:
+            # Older deltalake version - use row group optimization
+            args['max_rows_per_file'] = RG
+            args['max_rows_per_group'] = RG
+            args['min_rows_per_group'] = RG
+        # For newer versions, just use default parameters
     return args

{duckrun-0.2.6 → duckrun-0.2.8.dev0}/duckrun/writer.py RENAMED Viewed

@@ -12,7 +12,7 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
     """
     Build arguments for write_deltalake based on requirements:
     - If schema_mode='merge': use rust engine (no row group params)
-    - Otherwise: use pyarrow engine with row group optimization
+    - Otherwise: use pyarrow engine with row group optimization (if supported)
     """
     args = {
         'table_or_uri': path,
@@ -30,10 +30,17 @@ def _build_write_deltalake_args(path, df, mode, schema_mode=None, partition_by=N
         args['schema_mode'] = 'merge'
         args['engine'] = 'rust'
     else:
-        # Use pyarrow engine with row group optimization (default)
-        args['max_rows_per_file'] = RG
-        args['max_rows_per_group'] = RG
-        args['min_rows_per_group'] = RG
+        # Try to use pyarrow engine with row group optimization
+        # Check if row group parameters are supported by inspecting function signature
+        import inspect
+        sig = inspect.signature(write_deltalake)
+        if 'max_rows_per_file' in sig.parameters:
+            # Older deltalake version - use row group optimization
+            args['max_rows_per_file'] = RG
+            args['max_rows_per_group'] = RG
+            args['min_rows_per_group'] = RG
+        # For newer versions, just use default parameters
     return args

{duckrun-0.2.6 → duckrun-0.2.8.dev0}/duckrun.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: duckrun
-Version: 0.2.6
+Version: 0.2.8.dev0
 Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
 Author: mim
 License: MIT
@@ -26,7 +26,8 @@ A helper package for stuff that made my life easier when working with Fabric Pyt
 **Requirements:**
 - Lakehouse must have a schema (e.g., `dbo`, `sales`, `analytics`)
-- **Workspace and lakehouse names with spaces are now fully supported!** ✅
+- **Workspace names with spaces are fully supported!** ✅
 **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
@@ -47,23 +48,40 @@ pip install duckrun[local]
 ## Quick Start
+### Simple Example for New Users
+```python
+import duckrun
+# Connect to a workspace and manage lakehouses
+con = duckrun.connect('My Workspace')
+con.list_lakehouses()                           # See what lakehouses exist
+con.create_lakehouse_if_not_exists('data')      # Create if needed
+# Connect to a specific lakehouse and query data
+con = duckrun.connect("My Workspace/data.lakehouse/dbo")
+con.sql("SELECT * FROM my_table LIMIT 10").show()
+```
+### Full Feature Overview
 ```python
 import duckrun
 # 1. Workspace Management (list and create lakehouses)
 ws = duckrun.connect("My Workspace")
 lakehouses = ws.list_lakehouses()  # Returns list of lakehouse names
-ws.create_lakehouse_if_not_exists("New Lakehouse")
+ws.create_lakehouse_if_not_exists("New_Lakehouse")
 # 2. Connect to lakehouse with a specific schema
-con = duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
+con = duckrun.connect("My Workspace/MyLakehouse.lakehouse/dbo")
-# Works with workspace names containing spaces!
-con = duckrun.connect("Data Analytics/Sales Data.lakehouse/analytics")
+# Workspace names with spaces are supported!
+con = duckrun.connect("Data Analytics/SalesData.lakehouse/analytics")
 # Schema defaults to 'dbo' if not specified (scans all schemas)
 # ⚠️ WARNING: Scanning all schemas can be slow for large lakehouses!
-con = duckrun.connect("My Workspace/My Lakehouse.lakehouse")
+con = duckrun.connect("My Workspace/My_Lakehouse.lakehouse")
 # 3. Explore data
 con.sql("SELECT * FROM my_table LIMIT 10").show()

{duckrun-0.2.6 → duckrun-0.2.8.dev0}/duckrun.egg-info/SOURCES.txt RENAMED Viewed

@@ -2,6 +2,7 @@ LICENSE
 README.md
 pyproject.toml
 duckrun/__init__.py
+duckrun/auth.py
 duckrun/core.py
 duckrun/files.py
 duckrun/lakehouse.py

{duckrun-0.2.6 → duckrun-0.2.8.dev0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "duckrun"
-version = "0.2.6"
+version = "0.2.8.dev0"
 description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
 readme = "README.md"
 license = {text = "MIT"}