PyPI - duckrun - Versions diffs - 0.2.13.dev0__tar.gz → 0.2.14.dev1__tar.gz - Mend

duckrun 0.2.13.dev0tar.gz → 0.2.14.dev1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of duckrun might be problematic. Click here for more details.

Files changed (19) hide show

{duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: duckrun
-Version: 0.2.13.dev0
+Version: 0.2.14.dev1
 Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
 Author: mim
 License: MIT

{duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun/__init__.py RENAMED Viewed

@@ -2,7 +2,7 @@
 from duckrun.core import Duckrun
-__version__ = "0.2.9.dev5"
+__version__ = "0.2.14.dev1"
 # Expose unified connect method at module level
 connect = Duckrun.connect

{duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun/auth.py RENAMED Viewed

@@ -20,7 +20,6 @@ def get_token() -> Optional[str]:
     # Check if we already have a cached token
     token_env = os.environ.get("AZURE_STORAGE_TOKEN")
     if token_env and token_env != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
-        print("✅ Using existing Azure Storage token")
         return token_env
     print("🔐 Starting Azure authentication...")
@@ -77,35 +76,6 @@ def _get_device_code_token() -> Optional[str]:
         return None
-def _is_databricks() -> bool:
-    """Check if we're running in a Databricks environment"""
-    # Databricks sets specific environment variables
-    return (
-        os.environ.get("DATABRICKS_RUNTIME_VERSION") is not None or
-        os.environ.get("DB_HOME") is not None or
-        "databricks" in os.environ.get("SPARK_HOME", "").lower()
-    )
-def _get_databricks_token() -> Optional[str]:
-    """Get token using DefaultAzureCredential for Databricks environments"""
-    try:
-        from azure.identity import DefaultAzureCredential
-        # DefaultAzureCredential will automatically use Databricks managed identity
-        credential = DefaultAzureCredential()
-        token_obj = credential.get_token("https://storage.azure.com/.default")
-        os.environ["AZURE_STORAGE_TOKEN"] = token_obj.token
-        print("✅ Databricks authentication successful!")
-        return token_obj.token
-    except Exception as e:
-        print(f"❌ Databricks authentication failed: {e}")
-        print("💡 Make sure your Databricks cluster has the required Azure permissions")
-        return None
 def _get_local_token() -> Optional[str]:
     """Get token using CLI first, then browser fallback for local environments"""
     # First try Azure CLI directly

{duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun/core.py RENAMED Viewed

@@ -53,7 +53,8 @@ class Duckrun:
     def __init__(self, workspace_id: str, lakehouse_id: str, schema: str = "dbo",
                  sql_folder: Optional[str] = None, compaction_threshold: int = 10,
-                 scan_all_schemas: bool = False, storage_account: str = "onelake"):
+                 scan_all_schemas: bool = False, storage_account: str = "onelake",
+                 token_only: bool = False):
         # Store GUIDs for internal use
         self.workspace_id = workspace_id
         self.lakehouse_id = lakehouse_id
@@ -62,6 +63,7 @@ class Duckrun:
         self.compaction_threshold = compaction_threshold
         self.scan_all_schemas = scan_all_schemas
         self.storage_account = storage_account
+        self.token_only = token_only
         # Construct proper ABFSS URLs
         import re
@@ -93,12 +95,19 @@ class Duckrun:
         except ImportError:
             pass  # Not in Colab, use default transport
-        self._attach_lakehouse()
-        self._register_lookup_functions()
+        # Only attach lakehouse and register functions if not token_only mode
+        if not token_only:
+            self._attach_lakehouse()
+            self._register_lookup_functions()
+        else:
+            # In token_only mode, just create the secret for authentication
+            self._create_onelake_secret()
+            print("✓ Token authenticated (fast mode - tables not listed)")
     @classmethod
     def connect(cls, connection_string: str, sql_folder: Optional[str] = None,
-                compaction_threshold: int = 100, storage_account: str = "onelake"):
+                compaction_threshold: int = 100, storage_account: str = "onelake",
+                token_only: bool = False):
         """
         Create and connect to lakehouse or workspace.
@@ -112,6 +121,7 @@ class Duckrun:
             sql_folder: Optional path or URL to SQL files folder
             compaction_threshold: File count threshold for compaction
             storage_account: Storage account name (default: "onelake")
+            token_only: If True, only authenticate without listing tables (faster connection)
         Examples:
             # Workspace management only (supports spaces in names)
@@ -125,6 +135,9 @@ class Duckrun:
             dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse")  # defaults to dbo schema
             dr = Duckrun.connect("workspace/lakehouse.lakehouse", storage_account="xxx-onelake")  # custom storage
+            # Fast connection without table listing (token only)
+            dr = Duckrun.connect("workspace/lakehouse.lakehouse", token_only=True)
         Note:
             Internally resolves friendly names (with spaces) to GUIDs and constructs proper ABFSS URLs:
             "My Workspace/My Lakehouse.lakehouse/schema" becomes
@@ -169,7 +182,7 @@ class Duckrun:
         # Resolve friendly names to GUIDs and construct proper ABFSS path
         workspace_id, lakehouse_id = cls._resolve_names_to_guids(workspace_name, lakehouse_name)
-        return cls(workspace_id, lakehouse_id, schema, sql_folder, compaction_threshold, scan_all_schemas, storage_account)
+        return cls(workspace_id, lakehouse_id, schema, sql_folder, compaction_threshold, scan_all_schemas, storage_account, token_only)
     @classmethod
     def _resolve_names_to_guids(cls, workspace_name: str, lakehouse_name: str) -> tuple[str, str]:
@@ -401,15 +414,8 @@ class Duckrun:
             if not tables:
                 return
-            # Group tables by schema for display
-            schema_tables = {}
-            for schema_name, table_name in tables:
-                if schema_name not in schema_tables:
-                    schema_tables[schema_name] = []
-                schema_tables[schema_name].append(table_name)
-            attached_count = 0
-            skipped_tables = []
+            # Collect table names for display
+            table_names = []
             for schema_name, table_name in tables:
                 try:
@@ -417,18 +423,22 @@ class Duckrun:
                         # Create proper schema.table structure in DuckDB
                         self.con.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
                         view_name = f"{schema_name}.{table_name}"
+                        table_names.append(view_name)
                     else:
                         # Single schema mode - use just table name
                         view_name = table_name
+                        table_names.append(table_name)
                     self.con.sql(f"""
                         CREATE OR REPLACE VIEW {view_name}
                         AS SELECT * FROM delta_scan('{self.table_base_url}{schema_name}/{table_name}');
                     """)
-                    attached_count += 1
                 except Exception as e:
-                    skipped_tables.append(f"{schema_name}.{table_name}")
                     continue
+            # Print discovered tables as comma-separated list
+            if table_names:
+                print(", ".join(table_names))
         except Exception as e:
             print(f"❌ Error attaching lakehouse: {e}")
@@ -570,10 +580,10 @@ class Duckrun:
         # Register functions in DuckDB
         try:
-            self.con.create_function("get_workspace_name", get_workspace_name)
-            self.con.create_function("get_lakehouse_name", get_lakehouse_name)
-            self.con.create_function("get_workspace_id_from_name", get_workspace_id_from_name)
-            self.con.create_function("get_lakehouse_id_from_name", get_lakehouse_id_from_name)
+            self.con.create_function("get_workspace_name", get_workspace_name, null_handling='SPECIAL')
+            self.con.create_function("get_lakehouse_name", get_lakehouse_name, null_handling='SPECIAL')
+            self.con.create_function("get_workspace_id_from_name", get_workspace_id_from_name, null_handling='SPECIAL')
+            self.con.create_function("get_lakehouse_id_from_name", get_lakehouse_id_from_name, null_handling='SPECIAL')
         except Exception as e:
             print(f"⚠️  Warning: Could not register lookup functions: {e}")

{duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/duckrun.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: duckrun
-Version: 0.2.13.dev0
+Version: 0.2.14.dev1
 Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
 Author: mim
 License: MIT

{duckrun-0.2.13.dev0 → duckrun-0.2.14.dev1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "duckrun"
-version = "0.2.13.dev0"
+version = "0.2.14.dev1"
 description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
 readme = "README.md"
 license = {text = "MIT"}