PyPI - duckrun - Versions diffs - 0.2.14.dev1__py3-none-any.whl → 0.2.14.dev3__py3-none-any.whl - Mend

duckrun 0.2.14.dev1py3-none-any.whl → 0.2.14.dev3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of duckrun might be problematic. Click here for more details.

Files changed (10) hide show

duckrun/__init__.py +1 -1
duckrun/auth.py +12 -0
duckrun/core.py +379 -121
duckrun/runner.py +14 -6
{duckrun-0.2.14.dev1.dist-info → duckrun-0.2.14.dev3.dist-info}/METADATA +2 -2
duckrun-0.2.14.dev3.dist-info/RECORD +14 -0
duckrun-0.2.14.dev1.dist-info/RECORD +0 -14
{duckrun-0.2.14.dev1.dist-info → duckrun-0.2.14.dev3.dist-info}/WHEEL +0 -0
{duckrun-0.2.14.dev1.dist-info → duckrun-0.2.14.dev3.dist-info}/licenses/LICENSE +0 -0
{duckrun-0.2.14.dev1.dist-info → duckrun-0.2.14.dev3.dist-info}/top_level.txt +0 -0

duckrun/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from duckrun.core import Duckrun
-__version__ = "0.2.14.dev1"
+__version__ = "0.2.14.dev2"
 # Expose unified connect method at module level
 connect = Duckrun.connect

duckrun/auth.py CHANGED Viewed

@@ -2,9 +2,21 @@
 Enhanced authentication module for duckrun - supports multiple notebook environments
 """
 import os
+import sys
 from typing import Optional, Tuple
+def safe_print(message: str):
+    """Print message with safe encoding handling for Windows"""
+    try:
+        print(message)
+    except UnicodeEncodeError:
+        # Fallback: remove emojis and special chars
+        import re
+        clean_message = re.sub(r'[^\x00-\x7F]+', '', message)
+        print(clean_message)
 def get_token() -> Optional[str]:
     """
     Smart authentication that works across multiple environments:

duckrun/core.py CHANGED Viewed

@@ -4,11 +4,8 @@ import os
 import importlib.util
 import json
 import time
-from deltalake import DeltaTable, write_deltalake
 from typing import List, Tuple, Union, Optional, Callable, Dict, Any
 from string import Template
-import obstore as obs
-from obstore.store import AzureStore
 from datetime import datetime
 from .stats import get_stats as _get_stats
 from .runner import run as _run
@@ -17,7 +14,8 @@ from .writer import QueryResult
 class Duckrun:
     """
-    Lakehouse task runner with clean tuple-based API.
+    OneLake task runner with clean tuple-based API.
+    Supports lakehouses, warehouses, databases, and other OneLake items.
     Powered by DuckDB for fast data processing.
     Task formats:
@@ -30,6 +28,10 @@ class Duckrun:
         dr = Duckrun.connect("workspace/lakehouse.lakehouse")  # defaults to dbo schema, lists all tables
         dr.run(pipeline)
+        # For other OneLake items:
+        dr = Duckrun.connect("SNOWFLAKE/ONELAKEUSEAST.SnowflakeDatabase")
+        dr = Duckrun.connect("workspace/warehouse.Warehouse")
         # For data exploration with Spark-style API:
         dr = Duckrun.connect("workspace/lakehouse.lakehouse")
         dr.sql("SELECT * FROM table").show()
@@ -65,24 +67,53 @@ class Duckrun:
         self.storage_account = storage_account
         self.token_only = token_only
-        # Construct proper ABFSS URLs
+        # Store both full name (with .ItemType) and display name (without .ItemType) for backward compatibility
+        # lakehouse_id: Full name with suffix for API calls (e.g., "data.Lakehouse")
+        # lakehouse_display_name: Name only without suffix for user code/templates (e.g., "data")
+        self.lakehouse_id = lakehouse_id
+        # Extract display name (remove .ItemType suffix if present)
         import re
+        # Check if lakehouse_id has .ItemType suffix
+        if not re.match(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', lakehouse_id, re.IGNORECASE):
+            # Friendly name - extract base name without suffix
+            for suffix in ['.Lakehouse', '.Warehouse', '.Database', '.SnowflakeDatabase']:
+                if lakehouse_id.endswith(suffix):
+                    self.lakehouse_display_name = lakehouse_id[:-len(suffix)]
+                    break
+            else:
+                self.lakehouse_display_name = lakehouse_id
+        else:
+            # GUID - use as is
+            self.lakehouse_display_name = lakehouse_id
+        # Construct proper ABFSS URLs
+        # Format: abfss://{workspace}@{storage_account}.dfs.fabric.microsoft.com/{item}/Tables/
+        # where {workspace} and {item} can be:
+        #   - Names with .lakehouse suffix (lakehouse optimization when no spaces in workspace)
+        #   - GUIDs (when resolved via API for non-lakehouse items or items with spaces)
         guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
-        # If lakehouse_id is a GUID, use as-is
+        # Determine the item URL part for ABFSS
         if guid_pattern.match(lakehouse_id):
-            lakehouse_url_part = lakehouse_id
+            # Already a GUID - use as-is (from API resolution)
+            item_url_part = lakehouse_id
         else:
-            # If workspace name has no spaces, always append .lakehouse unless already present
-            if " " not in workspace_id and not lakehouse_id.endswith('.lakehouse'):
-                lakehouse_url_part = f'{lakehouse_id}.lakehouse'
-            else:
-                lakehouse_url_part = lakehouse_id
-        self.table_base_url = f'abfss://{workspace_id}@{storage_account}.dfs.fabric.microsoft.com/{lakehouse_url_part}/Tables/'
-        self.files_base_url = f'abfss://{workspace_id}@{storage_account}.dfs.fabric.microsoft.com/{lakehouse_url_part}/Files/'
+            # Friendly name - use as-is (already includes .ItemType suffix from connect())
+            item_url_part = lakehouse_id
+        self.table_base_url = f'abfss://{workspace_id}@{storage_account}.dfs.fabric.microsoft.com/{item_url_part}/Tables/'
+        self.files_base_url = f'abfss://{workspace_id}@{storage_account}.dfs.fabric.microsoft.com/{item_url_part}/Files/'
         # Keep legacy properties for backward compatibility
         self.workspace = workspace_id
-        self.lakehouse_name = lakehouse_id
+        self.lakehouse_name = self.lakehouse_display_name  # Use display name (without suffix) for backward compatibility
+        # Store display name without suffix for backward compatibility with user Python functions
+        # Extract base name by removing .ItemType suffix if present
+        import re
+        suffix_pattern = re.compile(r'\.(Lakehouse|Warehouse|Database|SnowflakeDatabase)$', re.IGNORECASE)
+        self.lakehouse_display_name = suffix_pattern.sub('', lakehouse_id)
         self.con = duckdb.connect()
         self.con.sql("SET preserve_insertion_order = false")
@@ -109,12 +140,15 @@ class Duckrun:
                 compaction_threshold: int = 100, storage_account: str = "onelake",
                 token_only: bool = False):
         """
-        Create and connect to lakehouse or workspace.
+        Create and connect to OneLake items (lakehouse, warehouse, database, etc.) or workspace.
         Smart detection based on connection string format:
         - "workspace" → workspace management only
-        - "ws/lh.lakehouse/schema" → full lakehouse connection
-        - "ws/lh.lakehouse" → lakehouse connection (defaults to dbo schema)
+        - "ws/item.lakehouse/schema" → lakehouse connection with specific schema
+        - "ws/item.lakehouse" → lakehouse connection (defaults to dbo schema)
+        - "ws/item.warehouse" → warehouse connection
+        - "ws/item.database" → database connection
+        - "ws/item.snowflakedatabase" → Snowflake database connection
         Args:
             connection_string: OneLake path or workspace name
@@ -129,19 +163,26 @@ class Duckrun:
             ws.list_lakehouses()
             ws.create_lakehouse_if_not_exists("New Lakehouse")
-            # Full lakehouse connections (supports spaces in names)
+            # Lakehouse connections (supports spaces in names)
             dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse/schema", sql_folder="./sql")
             dr = Duckrun.connect("Data Workspace/Sales Data.lakehouse/analytics")  # spaces supported
             dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse")  # defaults to dbo schema
             dr = Duckrun.connect("workspace/lakehouse.lakehouse", storage_account="xxx-onelake")  # custom storage
+            # Warehouse and database connections (always uses API to resolve GUIDs)
+            dr = Duckrun.connect("SNOWFLAKE/ONELAKEUSEAST.SnowflakeDatabase")
+            dr = Duckrun.connect("My Workspace/My Warehouse.Warehouse")
+            dr = Duckrun.connect("workspace/database.Database")
             # Fast connection without table listing (token only)
             dr = Duckrun.connect("workspace/lakehouse.lakehouse", token_only=True)
         Note:
-            Internally resolves friendly names (with spaces) to GUIDs and constructs proper ABFSS URLs:
-            "My Workspace/My Lakehouse.lakehouse/schema" becomes
-            "abfss://workspace_guid@onelake.dfs.fabric.microsoft.com/lakehouse_guid/Tables/schema"
+            - Lakehouse items without spaces in workspace name use optimization (no API calls)
+            - Non-lakehouse items always resolve to GUIDs via Fabric API
+            - Internally constructs proper ABFSS URLs:
+              "My Workspace/My Item.lakehouse/schema" →
+              "abfss://workspace_guid@onelake.dfs.fabric.microsoft.com/item_guid/Tables/schema"
         """
         # Check if it's a workspace-only connection (no "/" means workspace name only)
@@ -150,70 +191,94 @@ class Duckrun:
         scan_all_schemas = False
-        # Parse lakehouse connection string: "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
-        # Support workspace and lakehouse names with spaces
+        # Parse connection string: "ws/item_name.item_type/schema" or "ws/item_name.item_type"
+        # Support workspace and item names with spaces
+        # Item types: .lakehouse, .Lakehouse, .warehouse, .Warehouse, .database, .Database, .snowflakedatabase, .SnowflakeDatabase
         parts = connection_string.split("/")
         if len(parts) == 2:
-            workspace_name, lakehouse_name = parts
+            workspace_name, item_name_with_type = parts
             scan_all_schemas = True
             schema = "dbo"
         elif len(parts) == 3:
-            workspace_name, lakehouse_name, schema = parts
+            workspace_name, item_name_with_type, schema = parts
         else:
             raise ValueError(
                 f"Invalid connection string format: '{connection_string}'. "
                 "Expected formats:\n"
                 "  'workspace name' (workspace management only)\n"
-                "  'workspace name/lakehouse name.lakehouse' (lakehouse with dbo schema)\n"
-                "  'workspace name/lakehouse name.lakehouse/schema' (lakehouse with specific schema)"
+                "  'workspace name/item name.item_type' (item with dbo schema)\n"
+                "  'workspace name/item name.item_type/schema' (item with specific schema)\n"
+                "Supported item types: .lakehouse, .warehouse, .database, .snowflakedatabase (case-insensitive)"
             )
-        if lakehouse_name.endswith(".lakehouse"):
-            lakehouse_name = lakehouse_name[:-10]
+        # Extract item type and name
+        item_type = None
+        item_name = item_name_with_type
+        # Check for known item types (case-insensitive)
+        item_type_map = {
+            '.lakehouse': 'Lakehouse',
+            '.warehouse': 'Warehouse',
+            '.database': 'Database',
+            '.snowflakedatabase': 'SnowflakeDatabase'
+        }
+        # Parse item type and normalize the suffix to proper case
+        item_name_normalized = item_name_with_type
+        for suffix, mapped_type in item_type_map.items():
+            if item_name_with_type.lower().endswith(suffix):
+                item_type = mapped_type
+                item_name = item_name_with_type[:-len(suffix)]
+                # Normalize to proper case: ItemName.ItemType (e.g., data.Lakehouse)
+                item_name_normalized = f"{item_name}.{mapped_type}"
+                break
-        if not workspace_name or not lakehouse_name:
+        if not workspace_name or not item_name:
             raise ValueError(
                 "Missing required parameters. Use one of these formats:\n"
                 "  connect('workspace name')  # workspace management\n"
-                "  connect('workspace name/lakehouse name.lakehouse/schema')  # full lakehouse\n"
-                "  connect('workspace name/lakehouse name.lakehouse')  # defaults to dbo"
+                "  connect('workspace name/item name.item_type/schema')  # full item connection\n"
+                "  connect('workspace name/item name.item_type')  # defaults to dbo"
             )
-        # Resolve friendly names to GUIDs and construct proper ABFSS path
-        workspace_id, lakehouse_id = cls._resolve_names_to_guids(workspace_name, lakehouse_name)
+        # Per OneLake API docs: Can use friendly names if no spaces/special characters
+        # Otherwise must resolve to GUIDs
+        # Check for spaces or special characters that would require GUID resolution
+        has_special_chars = " " in workspace_name or " " in item_name
+        if has_special_chars:
+            # Names have spaces/special chars: resolve to GUIDs via API
+            workspace_id, item_id = cls._resolve_names_to_guids(workspace_name, item_name, item_type)
+        else:
+            # No spaces/special chars: use friendly names directly (works for all item types)
+            # Use normalized name with proper case for API compatibility
+            workspace_id = workspace_name
+            item_id = item_name_normalized  # Use normalized with proper case
-        return cls(workspace_id, lakehouse_id, schema, sql_folder, compaction_threshold, scan_all_schemas, storage_account, token_only)
+        return cls(workspace_id, item_id, schema, sql_folder, compaction_threshold, scan_all_schemas, storage_account, token_only)
     @classmethod
-    def _resolve_names_to_guids(cls, workspace_name: str, lakehouse_name: str) -> tuple[str, str]:
+    def _resolve_names_to_guids(cls, workspace_name: str, item_name: str, item_type: Optional[str] = 'Lakehouse') -> tuple[str, str]:
         """
-        Resolve friendly workspace and lakehouse names to their GUIDs.
-        Optimization: If names don't contain spaces, use them directly (no API calls needed).
-        Only resolve to GUIDs when names contain spaces or are already GUIDs.
+        Resolve friendly workspace and item names to their GUIDs.
         Args:
             workspace_name: Display name of the workspace (can contain spaces)
-            lakehouse_name: Display name of the lakehouse (can contain spaces)
+            item_name: Display name of the item (can contain spaces)
+            item_type: Type of item - 'Lakehouse', 'Warehouse', 'Database', 'SnowflakeDatabase', etc.
         Returns:
-            Tuple of (workspace_id, lakehouse_id) - either resolved GUIDs or original names
+            Tuple of (workspace_id, item_id) - resolved GUIDs
         """
         # Check if names are already GUIDs first
         import re
         guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
-        if guid_pattern.match(workspace_name) and guid_pattern.match(lakehouse_name):
-            return workspace_name, lakehouse_name
-        # Optimization: If workspace name has no spaces, use both names directly (old behavior)
-        # Note: Lakehouse names cannot contain spaces in Microsoft Fabric, only workspace names can
-        if " " not in workspace_name:
-            return workspace_name, lakehouse_name
-        # Workspace name contains spaces - need to resolve both to GUIDs for proper ABFSS URLs
+        if guid_pattern.match(workspace_name) and guid_pattern.match(item_name):
+            return workspace_name, item_name
+        # Need to resolve to GUIDs via API
         try:
             # Get authentication token using enhanced auth system
             from .auth import get_fabric_api_token
@@ -231,8 +296,7 @@ class Duckrun:
             # Resolve workspace name to ID
             if current_workspace_id:
-                # In notebook environment, we could use current workspace ID
-                # but we should validate it matches the requested workspace name
+                # In notebook environment, validate it matches the requested workspace name
                 workspace_id = cls._resolve_workspace_id_by_name(token, workspace_name)
                 if not workspace_id:
                     # Fallback to current workspace if name resolution fails
@@ -244,21 +308,26 @@ class Duckrun:
                 if not workspace_id:
                     raise ValueError(f"Workspace '{workspace_name}' not found")
-            # Resolve lakehouse name to ID (required for ABFSS URLs with spaces)
-            lakehouse_id = cls._resolve_lakehouse_id_by_name(token, workspace_id, lakehouse_name)
-            if not lakehouse_id:
-                raise ValueError(f"Lakehouse '{lakehouse_name}' not found in workspace '{workspace_name}'")
+            # Resolve item name to ID based on item type
+            if item_type == 'Lakehouse':
+                item_id = cls._resolve_lakehouse_id_by_name(token, workspace_id, item_name)
+            else:
+                # Use generic item resolver for non-lakehouse items
+                item_id = cls._resolve_item_id_by_name(token, workspace_id, item_name, item_type)
-            return workspace_id, lakehouse_id
+            if not item_id:
+                raise ValueError(f"{item_type} '{item_name}' not found in workspace '{workspace_name}'")
+            return workspace_id, item_id
         except Exception as e:
             print(f"❌ Failed to resolve names to GUIDs: {e}")
-            print(f"❌ Cannot use friendly names with spaces '{workspace_name}'/'{lakehouse_name}' in ABFSS URLs without GUID resolution")
-            print("❌ Microsoft Fabric requires actual workspace and lakehouse GUIDs for ABFSS access when names contain spaces")
+            print(f"❌ Cannot resolve '{workspace_name}'/'{item_name}' ({item_type}) to GUIDs")
+            print("❌ Microsoft Fabric requires actual workspace and item GUIDs for ABFSS access")
             raise ValueError(
-                f"Unable to resolve workspace '{workspace_name}' and lakehouse '{lakehouse_name}' to GUIDs. "
-                f"ABFSS URLs require actual GUIDs when names contain spaces. "
-                f"Please ensure you have proper authentication and the workspace/lakehouse names are correct."
+                f"Unable to resolve workspace '{workspace_name}' and {item_type.lower()} '{item_name}' to GUIDs. "
+                f"ABFSS URLs require actual GUIDs. "
+                f"Please ensure you have proper authentication and the workspace/item names are correct."
             )
     @classmethod
@@ -300,6 +369,58 @@ class Duckrun:
             return None
         except Exception:
             return None
+    @classmethod
+    def _resolve_item_id_by_name(cls, token: str, workspace_id: str, item_name: str, item_type: str) -> Optional[str]:
+        """
+        Get item ID from display name within a workspace using generic items API.
+        Works for any item type: Warehouse, Database, SnowflakeDatabase, etc.
+        Args:
+            token: Fabric API authentication token
+            workspace_id: Workspace GUID
+            item_name: Display name of the item
+            item_type: Type of item (e.g., 'Warehouse', 'Database', 'SnowflakeDatabase')
+        Returns:
+            Item GUID if found, None otherwise
+        """
+        try:
+            import requests
+            # Use generic items API with type filter
+            url = f"https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}/items"
+            headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
+            # Add type filter as query parameter
+            params = {"type": item_type}
+            print(f"   Searching for {item_type} '{item_name}' in workspace {workspace_id}")
+            print(f"   API URL: {url}?type={item_type}")
+            response = requests.get(url, headers=headers, params=params)
+            response.raise_for_status()
+            result = response.json()
+            items = result.get("value", [])
+            print(f"   Found {len(items)} items of type {item_type}")
+            if items:
+                print(f"   Available items: {[item.get('displayName') for item in items]}")
+            for item in items:
+                if item.get("displayName") == item_name:
+                    item_id = item.get("id")
+                    print(f"   Found matching item: {item_name} -> {item_id}")
+                    return item_id
+            print(f"   Item '{item_name}' not found in the list")
+            return None
+        except Exception as e:
+            print(f"   Error resolving {item_type} item: {e}")
+            if hasattr(e, 'response') and e.response is not None:
+                print(f"   Response status: {e.response.status_code}")
+                print(f"   Response body: {e.response.text}")
+            return None
     @classmethod
     def connect_workspace(cls, workspace_name: str):
@@ -341,77 +462,138 @@ class Duckrun:
     def _discover_tables_fast(self) -> List[Tuple[str, str]]:
         """
-        Fast Delta table discovery using obstore with list_with_delimiter.
-        Only lists directories, not files - super fast!
+        Fast table discovery using OneLake Delta Table API (Unity Catalog compatible).
+        Uses: https://learn.microsoft.com/en-us/fabric/onelake/table-apis/delta-table-apis-overview
         Returns:
             List of tuples: [(schema, table_name), ...]
         """
-        token = self._get_storage_token()
-        if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
-            print("Authenticating with Azure for table discovery (detecting environment automatically)...")
-            from .auth import get_token
-            token = get_token()
-            if not token:
-                print("❌ Failed to authenticate for table discovery")
-                return []
-        url = f"abfss://{self.workspace}@{self.storage_account}.dfs.fabric.microsoft.com/"
-        store = AzureStore.from_url(url, bearer_token=token)
-        # Use the same lakehouse URL part logic as in __init__ to ensure .lakehouse suffix is added when needed
-        import re
-        guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
-        if guid_pattern.match(self.lakehouse_id):
-            lakehouse_url_part = self.lakehouse_id
-        else:
-            # If workspace name has no spaces, always append .lakehouse unless already present
-            if " " not in self.workspace_id and not self.lakehouse_id.endswith('.lakehouse'):
-                lakehouse_url_part = f'{self.lakehouse_id}.lakehouse'
+        try:
+            # Get storage token for OneLake
+            token = self._get_storage_token()
+            if token == "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
+                print("Authenticating with Azure for table discovery...")
+                from .auth import get_token
+                token = get_token()
+                if not token:
+                    print("❌ Failed to authenticate for table discovery")
+                    return []
+            # OneLake Delta Table API endpoint (Unity Catalog compatible)
+            base_url = "https://onelake.table.fabric.microsoft.com/delta"
+            # Determine workspace/item identifier for API
+            # Per docs: Can use friendly names (WorkspaceName/ItemName.ItemType) if no special characters
+            # Otherwise must use GUIDs (WorkspaceID/ItemID)
+            import re
+            guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
+            # Check if we're using GUIDs or friendly names
+            if guid_pattern.match(self.workspace_id) and guid_pattern.match(self.lakehouse_id):
+                # Using GUIDs - use them directly in API
+                workspace_identifier = self.workspace_id
+                item_identifier = self.lakehouse_id
+                catalog_name = self.lakehouse_id
             else:
-                lakehouse_url_part = self.lakehouse_id
-        base_path = f"{lakehouse_url_part}/Tables/"
-        tables_found = []
-        if self.scan_all_schemas:
-            # Discover all schemas first
-            schemas_result = obs.list_with_delimiter(store, prefix=base_path)
-            schemas = [
-                prefix.rstrip('/').split('/')[-1]
-                for prefix in schemas_result['common_prefixes']
-            ]
-            # Discover tables in each schema
-            for schema_name in schemas:
-                schema_path = f"{base_path}{schema_name}/"
-                result = obs.list_with_delimiter(store, prefix=schema_path)
+                # Using friendly names - lakehouse_id already includes .ItemType suffix
+                workspace_identifier = self.workspace_id
+                item_identifier = self.lakehouse_id
+                catalog_name = self.lakehouse_id
+            print(f"🔍 Discovering tables via OneLake Delta Table API...")
+            print(f"   Using identifier: {workspace_identifier}/{item_identifier}")
+            tables_found = []
+            if self.scan_all_schemas:
+                # First, list all schemas
+                schemas_url = f"{base_url}/{workspace_identifier}/{item_identifier}/api/2.1/unity-catalog/schemas"
+                params = {"catalog_name": catalog_name}
+                headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
-                for table_prefix in result['common_prefixes']:
-                    table_name = table_prefix.rstrip('/').split('/')[-1]
-                    # Skip non-table directories
-                    if table_name not in ('metadata', 'iceberg'):
-                        tables_found.append((schema_name, table_name))
-        else:
-            # Scan specific schema only
-            schema_path = f"{base_path}{self.schema}/"
-            result = obs.list_with_delimiter(store, prefix=schema_path)
+                schemas_response = requests.get(schemas_url, headers=headers, params=params)
+                if schemas_response.status_code == 200:
+                    schemas_result = schemas_response.json()
+                    schemas = schemas_result.get("schemas", [])
+                    schema_names = [s.get("name") for s in schemas if s.get("name")]
+                    print(f"   Found {len(schema_names)} schemas: {schema_names}")
+                    # Get tables from each schema
+                    for schema_name in schema_names:
+                        tables_url = f"{base_url}/{workspace_identifier}/{item_identifier}/api/2.1/unity-catalog/tables"
+                        tables_params = {
+                            "catalog_name": catalog_name,
+                            "schema_name": schema_name
+                        }
+                        tables_response = requests.get(tables_url, headers=headers, params=tables_params)
+                        if tables_response.status_code == 200:
+                            tables_result = tables_response.json()
+                            tables = tables_result.get("tables", [])
+                            for table in tables:
+                                table_name = table.get("name", "")
+                                if table_name:
+                                    tables_found.append((schema_name, table_name))
+                            if tables:
+                                print(f"   Schema '{schema_name}': {len(tables)} tables")
+                else:
+                    print(f"   Failed to list schemas: {schemas_response.status_code}")
+                    if schemas_response.status_code != 404:
+                        print(f"   Response: {schemas_response.text[:300]}")
+            else:
+                # Single schema mode - list tables in specific schema
+                tables_url = f"{base_url}/{workspace_identifier}/{item_identifier}/api/2.1/unity-catalog/tables"
+                params = {
+                    "catalog_name": catalog_name,
+                    "schema_name": self.schema
+                }
+                headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
+                print(f"   Listing tables in schema: {self.schema}")
+                tables_response = requests.get(tables_url, headers=headers, params=params)
+                if tables_response.status_code == 200:
+                    tables_result = tables_response.json()
+                    tables = tables_result.get("tables", [])
+                    for table in tables:
+                        table_name = table.get("name", "")
+                        if table_name:
+                            tables_found.append((self.schema, table_name))
+                    print(f"   Found {len(tables)} tables")
+                elif tables_response.status_code == 404:
+                    print(f"   Schema '{self.schema}' not found or has no tables")
+                else:
+                    print(f"   Failed to list tables: {tables_response.status_code}")
+                    print(f"   Response: {tables_response.text[:300]}")
-            for table_prefix in result['common_prefixes']:
-                table_name = table_prefix.rstrip('/').split('/')[-1]
-                if table_name not in ('metadata', 'iceberg'):
-                    tables_found.append((self.schema, table_name))
-        return tables_found
+            return tables_found
+        except Exception as e:
+            print(f"❌ Error during table discovery: {e}")
+            import traceback
+            traceback.print_exc()
+            return []
     def _attach_lakehouse(self):
         """Attach lakehouse tables as DuckDB views using fast discovery"""
+        print(f"🔌 Attaching tables from schema: {self.schema if not self.scan_all_schemas else 'all schemas'}")
         self._create_onelake_secret()
         try:
             tables = self._discover_tables_fast()
             if not tables:
+                if self.scan_all_schemas:
+                    print(f"⚠️  No tables found in any schema")
+                else:
+                    print(f"⚠️  No tables found in {self.schema} schema")
                 return
             # Collect table names for display
@@ -434,6 +616,7 @@ class Duckrun:
                         AS SELECT * FROM delta_scan('{self.table_base_url}{schema_name}/{table_name}');
                     """)
                 except Exception as e:
+                    print(f"⚠️  Failed to attach table {schema_name}.{table_name}: {e}")
                     continue
             # Print discovered tables as comma-separated list
@@ -442,6 +625,8 @@ class Duckrun:
         except Exception as e:
             print(f"❌ Error attaching lakehouse: {e}")
+            import traceback
+            traceback.print_exc()
     def _register_lookup_functions(self):
         """
@@ -587,24 +772,97 @@ class Duckrun:
         except Exception as e:
             print(f"⚠️  Warning: Could not register lookup functions: {e}")
-    def get_workspace_id(self) -> str:
+    def get_workspace_id(self, force: bool = False) -> str:
         """
         Get the workspace ID (GUID or name without spaces).
         Use this when passing workspace parameter to Python functions.
+        Args:
+            force: If True, always resolve to actual GUID via API. If False, returns stored value (default: False)
         Returns:
             Workspace ID - either a GUID or workspace name without spaces
         """
+        if not force:
+            return self.workspace_id
+        # Force resolution to GUID
+        import re
+        guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
+        # If already a GUID, return it
+        if guid_pattern.match(self.workspace_id):
+            return self.workspace_id
+        # Try to get from notebook context first (fastest)
+        try:
+            import notebookutils  # type: ignore
+            workspace_guid = notebookutils.runtime.context.get("workspaceId")
+            if workspace_guid:
+                return workspace_guid
+        except ImportError:
+            pass
+        # Resolve via API
+        try:
+            from .auth import get_fabric_api_token
+            token = get_fabric_api_token()
+            if token:
+                resolved_id = self._resolve_workspace_id_by_name(token, self.workspace_id)
+                if resolved_id:
+                    return resolved_id
+        except Exception:
+            pass
+        # Fallback to original value
         return self.workspace_id
-    def get_lakehouse_id(self) -> str:
+    def get_lakehouse_id(self, force: bool = False) -> str:
         """
         Get the lakehouse ID (GUID or name).
         Use this when passing lakehouse parameter to Python functions.
+        Args:
+            force: If True, always resolve to actual GUID via API. If False, returns stored value (default: False)
         Returns:
             Lakehouse ID - either a GUID or lakehouse name
         """
+        if not force:
+            return self.lakehouse_id
+        # Force resolution to GUID
+        import re
+        guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
+        # If already a GUID, return it
+        if guid_pattern.match(self.lakehouse_id):
+            return self.lakehouse_id
+        # Try to get from notebook context first (fastest)
+        try:
+            import notebookutils  # type: ignore
+            lakehouse_guid = notebookutils.lakehouse.get("id")
+            if lakehouse_guid:
+                return lakehouse_guid
+        except (ImportError, Exception):
+            pass
+        # Resolve via API
+        try:
+            from .auth import get_fabric_api_token
+            token = get_fabric_api_token()
+            if token:
+                # First get workspace GUID
+                workspace_guid = self.get_workspace_id(force=True)
+                # Then resolve lakehouse name to ID
+                resolved_id = self._resolve_lakehouse_id_by_name(token, workspace_guid, self.lakehouse_id)
+                if resolved_id:
+                    return resolved_id
+        except Exception:
+            pass
+        # Fallback to original value
         return self.lakehouse_id
     def run(self, pipeline: List[Tuple]) -> bool:

duckrun/runner.py CHANGED Viewed

@@ -130,9 +130,12 @@ def _run_python(duckrun_instance, name: str, args: tuple) -> Any:
     # Get original and resolved names
     original_workspace = duckrun_instance.workspace
-    original_lakehouse = duckrun_instance.lakehouse_name
+    original_lakehouse = duckrun_instance.lakehouse_display_name  # Base name without suffix (e.g., "data")
     resolved_workspace = duckrun_instance.workspace_id
-    resolved_lakehouse = duckrun_instance.lakehouse_id
+    # Always pass base lakehouse name (without .Lakehouse suffix) to user functions
+    # User functions expect just the name like "data", not "data.Lakehouse"
+    resolved_lakehouse = duckrun_instance.lakehouse_display_name
     # Substitute workspace/lakehouse names in args if they differ
     # This prevents URL encoding issues when names contain spaces
@@ -149,7 +152,7 @@ def _run_python(duckrun_instance, name: str, args: tuple) -> Any:
             else:
                 substituted_args.append(arg)
         args = tuple(substituted_args)
-        print(f"📝 Auto-substituted workspace/lakehouse names in args for URL compatibility")
+        print(f"📝 Auto-substituted workspace/lakehouse names in args")
     print(f"Running Python: {name}{args}")
     result = func(*args)
@@ -282,12 +285,17 @@ def _read_sql_file(duckrun_instance, table_name: str, params: Optional[Dict] = N
             # If GUID, use just the GUID
             content = content.replace('${lh}.Lakehouse', duckrun_instance.lakehouse_name)
         else:
-            # If not GUID, use legacy format
-            content = content.replace('${lh}.Lakehouse', f'{duckrun_instance.lakehouse_name}.Lakehouse')
+            # If not GUID, check if lakehouse_name already has .ItemType suffix
+            if duckrun_instance.lakehouse_name.endswith(('.Lakehouse', '.Warehouse', '.Database', '.SnowflakeDatabase')):
+                # Already has suffix - use as is
+                content = content.replace('${lh}.Lakehouse', duckrun_instance.lakehouse_name)
+            else:
+                # No suffix - add .Lakehouse for legacy format
+                content = content.replace('${lh}.Lakehouse', f'{duckrun_instance.lakehouse_name}.Lakehouse')
     full_params = {
         'ws': duckrun_instance.workspace,
-        'lh': duckrun_instance.lakehouse_name,
+        'lh': duckrun_instance.lakehouse_display_name,  # Use display name (without suffix) for backward compat
         'schema': duckrun_instance.schema,
         'storage_account': duckrun_instance.storage_account,
         'tables_url': duckrun_instance.table_base_url,

{duckrun-0.2.14.dev1.dist-info → duckrun-0.2.14.dev3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: duckrun
-Version: 0.2.14.dev1
+Version: 0.2.14.dev3
 Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
 Author: mim
 License: MIT
@@ -10,7 +10,7 @@ Project-URL: Issues, https://github.com/djouallah/duckrun/issues
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: duckdb>=1.2.2
+Requires-Dist: duckdb>=1.2.0
 Requires-Dist: deltalake<=0.18.2
 Requires-Dist: requests>=2.28.0
 Requires-Dist: obstore>=0.2.0

duckrun-0.2.14.dev3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+duckrun/__init__.py,sha256=oPQXpJEgHpX_KgMrx_TWax9awIbr2B9z32cFuuG_p30,236
+duckrun/auth.py,sha256=EMaf-L2zeNOjbHOT97xYxfZNfWo4WrwrU1h3vBQTgEc,9624
+duckrun/core.py,sha256=_D0CnaRNQm_wW4bSP__EAPHEt_VNgf9N-VXWYSZScL8,65829
+duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
+duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
+duckrun/runner.py,sha256=JnRJoQ_Db__iXlhjTohplXR83NUJxItgyaa7AzrDxwE,14833
+duckrun/semantic_model.py,sha256=obzlN2-dbEW3JmDop-vrZGGGLi9u3ThhTbgtDjou7uY,29509
+duckrun/stats.py,sha256=oKIjZ7u5cFVT63FuOl5UqoDsOG3098woSCn-uI6i_sQ,11084
+duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
+duckrun-0.2.14.dev3.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
+duckrun-0.2.14.dev3.dist-info/METADATA,sha256=tOLtAIHcEJyXk93hvvgZNC3Cx7U2Dy7iatRutBnrU3Y,20771
+duckrun-0.2.14.dev3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+duckrun-0.2.14.dev3.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
+duckrun-0.2.14.dev3.dist-info/RECORD,,

duckrun-0.2.14.dev1.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-duckrun/__init__.py,sha256=OxPnNrxYqv_2XfiQPO27JiZDIxze4jnUE3VaqDdioAg,236
-duckrun/auth.py,sha256=dMqIzozgEQ5v7Uc3Mb_OoFZGmsAq0m-VOoYCVL7rehc,9281
-duckrun/core.py,sha256=LvxplwziTLb_18n064waoN3oWMuhpVJe_-y6GYfoBOc,53127
-duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
-duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
-duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
-duckrun/semantic_model.py,sha256=obzlN2-dbEW3JmDop-vrZGGGLi9u3ThhTbgtDjou7uY,29509
-duckrun/stats.py,sha256=oKIjZ7u5cFVT63FuOl5UqoDsOG3098woSCn-uI6i_sQ,11084
-duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
-duckrun-0.2.14.dev1.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
-duckrun-0.2.14.dev1.dist-info/METADATA,sha256=MhpAtTMLpzOwOINN7Dgs6ih_JhjhbzxX73W_E6N30pA,20771
-duckrun-0.2.14.dev1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-duckrun-0.2.14.dev1.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
-duckrun-0.2.14.dev1.dist-info/RECORD,,

{duckrun-0.2.14.dev1.dist-info → duckrun-0.2.14.dev3.dist-info}/WHEEL RENAMED Viewed

File without changes

{duckrun-0.2.14.dev1.dist-info → duckrun-0.2.14.dev3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{duckrun-0.2.14.dev1.dist-info → duckrun-0.2.14.dev3.dist-info}/top_level.txt RENAMED Viewed

File without changes

duckrun 0.2.14.dev1__py3-none-any.whl → 0.2.14.dev3__py3-none-any.whl

Potentially problematic release.

duckrun 0.2.14.dev1py3-none-any.whl → 0.2.14.dev3py3-none-any.whl