PyPI - duckrun - Versions diffs - 0.2.11.dev0__py3-none-any.whl → 0.2.13__py3-none-any.whl - Mend

duckrun 0.2.11.dev0py3-none-any.whl → 0.2.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

duckrun/auth.py +9 -15
duckrun/core.py +7 -36
{duckrun-0.2.11.dev0.dist-info → duckrun-0.2.13.dist-info}/METADATA +74 -65
duckrun-0.2.13.dist-info/RECORD +14 -0
duckrun-0.2.11.dev0.dist-info/RECORD +0 -14
{duckrun-0.2.11.dev0.dist-info → duckrun-0.2.13.dist-info}/WHEEL +0 -0
{duckrun-0.2.11.dev0.dist-info → duckrun-0.2.13.dist-info}/licenses/LICENSE +0 -0
{duckrun-0.2.11.dev0.dist-info → duckrun-0.2.13.dist-info}/top_level.txt +0 -0

duckrun/auth.py CHANGED Viewed

@@ -20,7 +20,6 @@ def get_token() -> Optional[str]:
     # Check if we already have a cached token
     token_env = os.environ.get("AZURE_STORAGE_TOKEN")
     if token_env and token_env != "PLACEHOLDER_TOKEN_TOKEN_NOT_AVAILABLE":
-        print("✅ Using existing Azure Storage token")
         return token_env
     print("🔐 Starting Azure authentication...")
@@ -38,21 +37,16 @@ def get_token() -> Optional[str]:
     except Exception as e:
         print(f"⚠️ Fabric notebook authentication failed: {e}")
-    # Detect environment type for fallback authentication
+    # Try local/VS Code authentication (Azure CLI + browser)
+    print("🖥️ Trying local authentication (Azure CLI + browser fallback)...")
+    token = _get_local_token()
+    if token:
+        return token
+    # If local auth failed, fall back to device code flow
+    print("🔐 Falling back to device code flow for remote/headless environment...")
     try:
-        # Check if we're in Google Colab first
-        try:
-            import google.colab
-            print("🚀 Google Colab detected - using device code flow")
-            return _get_device_code_token()
-        except ImportError:
-            pass
-        # For all other environments (including VS Code), try Azure CLI first
-        # This includes local development, VS Code notebooks, etc.
-        print("🖥️ Local/VS Code environment detected - trying Azure CLI first, then browser fallback")
-        return _get_local_token()
+        return _get_device_code_token()
     except Exception as e:
         print(f"❌ Authentication failed: {e}")
         print("💡 Try refreshing and running again, or check your Azure permissions")

duckrun/core.py CHANGED Viewed

@@ -133,11 +133,8 @@ class Duckrun:
         # Check if it's a workspace-only connection (no "/" means workspace name only)
         if "/" not in connection_string:
-            print(f"Connecting to workspace '{connection_string}' for management operations...")
             return WorkspaceConnection(connection_string)
-        print("Connecting to Lakehouse...")
         scan_all_schemas = False
         # Parse lakehouse connection string: "ws/lh.lakehouse/schema" or "ws/lh.lakehouse"
@@ -195,17 +192,14 @@ class Duckrun:
         guid_pattern = re.compile(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', re.IGNORECASE)
         if guid_pattern.match(workspace_name) and guid_pattern.match(lakehouse_name):
-            print(f"✅ Names are already GUIDs: workspace={workspace_name}, lakehouse={lakehouse_name}")
             return workspace_name, lakehouse_name
         # Optimization: If workspace name has no spaces, use both names directly (old behavior)
         # Note: Lakehouse names cannot contain spaces in Microsoft Fabric, only workspace names can
         if " " not in workspace_name:
-            print(f"✅ Using names directly (workspace has no spaces): workspace={workspace_name}, lakehouse={lakehouse_name}")
             return workspace_name, lakehouse_name
         # Workspace name contains spaces - need to resolve both to GUIDs for proper ABFSS URLs
-        print(f"🔍 Resolving '{workspace_name}' workspace and '{lakehouse_name}' lakehouse to GUIDs (workspace has spaces)...")
         try:
             # Get authentication token using enhanced auth system
@@ -242,7 +236,6 @@ class Duckrun:
             if not lakehouse_id:
                 raise ValueError(f"Lakehouse '{lakehouse_name}' not found in workspace '{workspace_name}'")
-            print(f"✅ Resolved: {workspace_name} → {workspace_id}, {lakehouse_name} → {lakehouse_id}")
             return workspace_id, lakehouse_id
         except Exception as e:
@@ -388,7 +381,6 @@ class Duckrun:
                         tables_found.append((schema_name, table_name))
         else:
             # Scan specific schema only
-            print(f"🔍 Discovering tables in schema '{self.schema}'...")
             schema_path = f"{base_path}{self.schema}/"
             result = obs.list_with_delimiter(store, prefix=schema_path)
@@ -407,27 +399,10 @@ class Duckrun:
             tables = self._discover_tables_fast()
             if not tables:
-                if self.scan_all_schemas:
-                    print(f"No Delta tables found in {self.lakehouse_name}/Tables/")
-                else:
-                    print(f"No Delta tables found in {self.lakehouse_name}/Tables/{self.schema}/")
                 return
-            # Group tables by schema for display
-            schema_tables = {}
-            for schema_name, table_name in tables:
-                if schema_name not in schema_tables:
-                    schema_tables[schema_name] = []
-                schema_tables[schema_name].append(table_name)
-            # Display tables by schema
-            print(f"\n📊 Found {len(tables)} tables:")
-            for schema_name in sorted(schema_tables.keys()):
-                table_list = sorted(schema_tables[schema_name])
-                print(f"   {schema_name}: {', '.join(table_list)}")
-            attached_count = 0
-            skipped_tables = []
+            # Collect table names for display
+            table_names = []
             for schema_name, table_name in tables:
                 try:
@@ -435,28 +410,25 @@ class Duckrun:
                         # Create proper schema.table structure in DuckDB
                         self.con.sql(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
                         view_name = f"{schema_name}.{table_name}"
+                        table_names.append(view_name)
                     else:
                         # Single schema mode - use just table name
                         view_name = table_name
+                        table_names.append(table_name)
                     self.con.sql(f"""
                         CREATE OR REPLACE VIEW {view_name}
                         AS SELECT * FROM delta_scan('{self.table_base_url}{schema_name}/{table_name}');
                     """)
-                    attached_count += 1
                 except Exception as e:
-                    skipped_tables.append(f"{schema_name}.{table_name}")
                     continue
-            print(f"\n{'='*60}")
-            print(f"✅ Ready - {attached_count}/{len(tables)} tables available")
-            if skipped_tables:
-                print(f"⚠ Skipped {len(skipped_tables)} tables: {', '.join(skipped_tables[:3])}{'...' if len(skipped_tables) > 3 else ''}")
-            print(f"{'='*60}\n")
+            # Print discovered tables as comma-separated list
+            if table_names:
+                print(", ".join(table_names))
         except Exception as e:
             print(f"❌ Error attaching lakehouse: {e}")
-            print("Continuing without pre-attached tables.")
     def _register_lookup_functions(self):
         """
@@ -599,7 +571,6 @@ class Duckrun:
             self.con.create_function("get_lakehouse_name", get_lakehouse_name)
             self.con.create_function("get_workspace_id_from_name", get_workspace_id_from_name)
             self.con.create_function("get_lakehouse_id_from_name", get_lakehouse_id_from_name)
-            print("✅ Registered lookup functions: get_workspace_name, get_lakehouse_name, get_workspace_id_from_name, get_lakehouse_id_from_name")
         except Exception as e:
             print(f"⚠️  Warning: Could not register lookup functions: {e}")

{duckrun-0.2.11.dev0.dist-info → duckrun-0.2.13.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: duckrun
-Version: 0.2.11.dev0
+Version: 0.2.13
 Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
 Author: mim
 License: MIT
@@ -10,7 +10,7 @@ Project-URL: Issues, https://github.com/djouallah/duckrun/issues
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: duckdb>=1.2.0
+Requires-Dist: duckdb>=1.2.2
 Requires-Dist: deltalake<=0.18.2
 Requires-Dist: requests>=2.28.0
 Requires-Dist: obstore>=0.2.0
@@ -20,7 +20,7 @@ Dynamic: license-file
 <img src="https://raw.githubusercontent.com/djouallah/duckrun/main/duckrun.png" width="400" alt="Duckrun">
-A helper package for stuff that made my life easier when working with Fabric Python notebooks. Just the things that actually made sense to me - nothing fancy
+A helper package for working with Microsoft Fabric lakehouses - orchestration, SQL queries, and file management powered by DuckDB.
 ## Important Notes
@@ -28,7 +28,6 @@ A helper package for stuff that made my life easier when working with Fabric Pyt
 - Lakehouse must have a schema (e.g., `dbo`, `sales`, `analytics`)
 - **Workspace names with spaces are fully supported!** ✅
 **Delta Lake Version:** This package uses an older version of deltalake to maintain row size control capabilities, which is crucial for Power BI performance optimization. The newer Rust-based deltalake versions don't yet support the row group size parameters that are essential for optimal DirectLake performance.
 ## What It Does
@@ -40,12 +39,15 @@ It does orchestration, arbitrary SQL statements, and file manipulation. That's i
 ```bash
 pip install duckrun
 ```
-for local usage, Note: When running locally, your internet speed will be the main bottleneck.
+For local usage (requires Azure CLI or interactive browser auth):
 ```bash
 pip install duckrun[local]
 ```
+Note: When running locally, your internet speed will be the main bottleneck.
 ## Quick Start
 ### Simple Example for New Users
@@ -163,9 +165,6 @@ con.sql("""
     GROUP BY customer_id
 """).write.mode("overwrite").saveAsTable("customer_totals")
-# Append mode
-con.sql("SELECT * FROM new_orders").write.mode("append").saveAsTable("orders")
 # Schema evolution and partitioning (exact Spark API compatibility)
 con.sql("""
     SELECT
@@ -324,6 +323,73 @@ pipeline = [
 ## Advanced Features
+### SQL Lookup Functions
+Duckrun automatically registers helper functions that allow you to resolve workspace and lakehouse names from GUIDs directly in SQL queries. These are especially useful when working with storage logs or audit data that contains workspace/lakehouse IDs.
+**Available Functions:**
+```python
+con = duckrun.connect("workspace/lakehouse.lakehouse/dbo")
+# ID → Name lookups (most common use case)
+con.sql("""
+    SELECT
+        workspace_id,
+        get_workspace_name(workspace_id) as workspace_name,
+        lakehouse_id,
+        get_lakehouse_name(workspace_id, lakehouse_id) as lakehouse_name
+    FROM storage_logs
+""").show()
+# Name → ID lookups (reverse)
+con.sql("""
+    SELECT
+        workspace_name,
+        get_workspace_id_from_name(workspace_name) as workspace_id,
+        lakehouse_name,
+        get_lakehouse_id_from_name(workspace_id, lakehouse_name) as lakehouse_id
+    FROM configuration_table
+""").show()
+```
+**Function Reference:**
+- `get_workspace_name(workspace_id)` - Convert workspace GUID to display name
+- `get_lakehouse_name(workspace_id, lakehouse_id)` - Convert lakehouse GUID to display name
+- `get_workspace_id_from_name(workspace_name)` - Convert workspace name to GUID
+- `get_lakehouse_id_from_name(workspace_id, lakehouse_name)` - Convert lakehouse name to GUID
+**Features:**
+- ✅ **Automatic Caching**: Results are cached to avoid repeated API calls
+- ✅ **NULL on Error**: Returns `NULL` instead of errors for missing or inaccessible items
+- ✅ **Fabric API Integration**: Resolves names using Microsoft Fabric REST API
+- ✅ **Always Available**: Functions are automatically registered on connection
+**Example Use Case:**
+```python
+# Enrich OneLake storage logs with friendly names
+con = duckrun.connect("Analytics/Monitoring.lakehouse/dbo")
+result = con.sql("""
+    SELECT
+        workspace_id,
+        get_workspace_name(workspace_id) as workspace_name,
+        lakehouse_id,
+        get_lakehouse_name(workspace_id, lakehouse_id) as lakehouse_name,
+        operation_name,
+        COUNT(*) as operation_count,
+        SUM(bytes_transferred) as total_bytes
+    FROM onelake_storage_logs
+    WHERE log_date = CURRENT_DATE
+    GROUP BY ALL
+    ORDER BY workspace_name, lakehouse_name
+""").show()
+```
+This makes it easy to create human-readable reports from GUID-based log data!
 ### Schema Evolution & Partitioning
 Handle evolving schemas and optimize query performance with partitioning:
@@ -467,63 +533,6 @@ con = duckrun.connect(
 )
 ```
-## File Management API Reference
-### `copy(local_folder, remote_folder, file_extensions=None, overwrite=False)`
-Upload files from a local folder to OneLake Files section.
-**Parameters:**
-- `local_folder` (str): Path to local folder containing files to upload
-- `remote_folder` (str): **Required** target folder path in OneLake Files
-- `file_extensions` (list, optional): Filter by file extensions (e.g., `['.csv', '.parquet']`)
-- `overwrite` (bool, optional): Whether to overwrite existing files (default: False)
-**Returns:** `True` if all files uploaded successfully, `False` otherwise
-**Examples:**
-```python
-# Upload all files to a target folder
-con.copy("./data", "processed_data")
-# Upload only CSV and Parquet files
-con.copy("./reports", "monthly_reports", ['.csv', '.parquet'])
-# Upload with overwrite enabled
-con.copy("./backup", "daily_backup", overwrite=True)
-```
-### `download(remote_folder="", local_folder="./downloaded_files", file_extensions=None, overwrite=False)`
-Download files from OneLake Files section to a local folder.
-**Parameters:**
-- `remote_folder` (str, optional): Source folder path in OneLake Files (default: root)
-- `local_folder` (str, optional): Local destination folder (default: "./downloaded_files")
-- `file_extensions` (list, optional): Filter by file extensions (e.g., `['.csv', '.json']`)
-- `overwrite` (bool, optional): Whether to overwrite existing local files (default: False)
-**Returns:** `True` if all files downloaded successfully, `False` otherwise
-**Examples:**
-```python
-# Download all files from OneLake Files root
-con.download()
-# Download from specific folder
-con.download("processed_data", "./local_data")
-# Download only JSON files
-con.download("config", "./configs", ['.json'])
-```
-**Important Notes:**
-- Files are uploaded/downloaded to/from the **OneLake Files section**, not Delta Tables
-- The `remote_folder` parameter is **required** for uploads to prevent accidental uploads
-- Both methods default to `overwrite=False` for safety
-- Folder structure is preserved during upload/download operations
-- Progress is reported with file names, sizes, and upload/download status
 ## Complete Example
 ```python

duckrun-0.2.13.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,14 @@
+duckrun/__init__.py,sha256=cTj6KQ6hKmgu1z7k9nhDcO5lct049luxjx1V0QnymCo,235
+duckrun/auth.py,sha256=dMqIzozgEQ5v7Uc3Mb_OoFZGmsAq0m-VOoYCVL7rehc,9281
+duckrun/core.py,sha256=C5nnL-MheBfJPcw-Jr8t14jsm2iwMF07cYm8g_AXtFQ,52303
+duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
+duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
+duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
+duckrun/semantic_model.py,sha256=obzlN2-dbEW3JmDop-vrZGGGLi9u3ThhTbgtDjou7uY,29509
+duckrun/stats.py,sha256=oKIjZ7u5cFVT63FuOl5UqoDsOG3098woSCn-uI6i_sQ,11084
+duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
+duckrun-0.2.13.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
+duckrun-0.2.13.dist-info/METADATA,sha256=0r-l8dWnd8KLBGj7cspK53eUdaDeUG-iHsa74rGBaCo,20766
+duckrun-0.2.13.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+duckrun-0.2.13.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
+duckrun-0.2.13.dist-info/RECORD,,

duckrun-0.2.11.dev0.dist-info/RECORD DELETED Viewed

@@ -1,14 +0,0 @@
-duckrun/__init__.py,sha256=cTj6KQ6hKmgu1z7k9nhDcO5lct049luxjx1V0QnymCo,235
-duckrun/auth.py,sha256=qPaLQ7InlV9leA9r6E6VEeYavFFoBi0zSN8m_l1aoQs,9545
-duckrun/core.py,sha256=MlaHOOz9bg3-EDXR3C4pEcp75QsnEcbTOmvsMjomLKc,54279
-duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
-duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
-duckrun/runner.py,sha256=yrDxfy1RVkb8iK9GKGmIFZHzCvcO_0GVQlbng7Vw_iM,14171
-duckrun/semantic_model.py,sha256=obzlN2-dbEW3JmDop-vrZGGGLi9u3ThhTbgtDjou7uY,29509
-duckrun/stats.py,sha256=oKIjZ7u5cFVT63FuOl5UqoDsOG3098woSCn-uI6i_sQ,11084
-duckrun/writer.py,sha256=svUuPCYOhrz299NgnpTKhARKjfej0PxnoND2iPDSypk,8098
-duckrun-0.2.11.dev0.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
-duckrun-0.2.11.dev0.dist-info/METADATA,sha256=NuW94zw7gizsp_cVPFktiC-I9aMP8O37vrtfMq7cmiI,20629
-duckrun-0.2.11.dev0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-duckrun-0.2.11.dev0.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
-duckrun-0.2.11.dev0.dist-info/RECORD,,

{duckrun-0.2.11.dev0.dist-info → duckrun-0.2.13.dist-info}/WHEEL RENAMED Viewed

File without changes

{duckrun-0.2.11.dev0.dist-info → duckrun-0.2.13.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{duckrun-0.2.11.dev0.dist-info → duckrun-0.2.13.dist-info}/top_level.txt RENAMED Viewed

File without changes

duckrun 0.2.11.dev0__py3-none-any.whl → 0.2.13__py3-none-any.whl

duckrun 0.2.11.dev0py3-none-any.whl → 0.2.13py3-none-any.whl