PyPI - duckrun - Versions diffs - 0.2.3__tar.gz → 0.2.4__tar.gz - Mend

duckrun 0.2.3tar.gz → 0.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{duckrun-0.2.3 → duckrun-0.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: duckrun
-Version: 0.2.3
+Version: 0.2.4
 Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
 Author: mim
 License: MIT

{duckrun-0.2.3 → duckrun-0.2.4}/duckrun/core.py RENAMED Viewed

@@ -100,8 +100,6 @@ class Duckrun:
             workspace, lakehouse_name = parts
             scan_all_schemas = True
             schema = "dbo"
-            print(f"ℹ️  No schema specified. Using default schema 'dbo' for operations.")
-            print(f"   Scanning all schemas for table discovery...\n")
         elif len(parts) == 3:
             workspace, lakehouse_name, schema = parts
         else:
@@ -162,16 +160,13 @@ class Duckrun:
         if self.scan_all_schemas:
             # Discover all schemas first
-            print("🔍 Discovering schemas...")
             schemas_result = obs.list_with_delimiter(store, prefix=base_path)
             schemas = [
                 prefix.rstrip('/').split('/')[-1]
                 for prefix in schemas_result['common_prefixes']
             ]
-            print(f"   Found {len(schemas)} schemas: {', '.join(schemas)}\n")
             # Discover tables in each schema
-            print("🔍 Discovering tables...")
             for schema_name in schemas:
                 schema_path = f"{base_path}{schema_name}/"
                 result = obs.list_with_delimiter(store, prefix=schema_path)
@@ -208,9 +203,22 @@ class Duckrun:
                     print(f"No Delta tables found in {self.lakehouse_name}.Lakehouse/Tables/{self.schema}/")
                 return
-            print(f"\n📊 Found {len(tables)} Delta tables. Attaching as views...\n")
+            # Group tables by schema for display
+            schema_tables = {}
+            for schema_name, table_name in tables:
+                if schema_name not in schema_tables:
+                    schema_tables[schema_name] = []
+                schema_tables[schema_name].append(table_name)
+            # Display tables by schema
+            print(f"\n📊 Found {len(tables)} tables:")
+            for schema_name in sorted(schema_tables.keys()):
+                table_list = sorted(schema_tables[schema_name])
+                print(f"   {schema_name}: {', '.join(table_list)}")
             attached_count = 0
+            skipped_tables = []
             for schema_name, table_name in tables:
                 try:
                     if self.scan_all_schemas:
@@ -225,19 +233,16 @@ class Duckrun:
                         CREATE OR REPLACE VIEW {view_name}
                         AS SELECT * FROM delta_scan('{self.table_base_url}{schema_name}/{table_name}');
                     """)
-                    print(f"  ✓ Attached: {schema_name}.{table_name} → {view_name}")
                     attached_count += 1
                 except Exception as e:
-                    print(f"  ⚠ Skipped {schema_name}.{table_name}: {str(e)[:100]}")
+                    skipped_tables.append(f"{schema_name}.{table_name}")
                     continue
             print(f"\n{'='*60}")
-            print(f"✅ Successfully attached {attached_count}/{len(tables)} tables")
+            print(f"✅ Ready - {attached_count}/{len(tables)} tables available")
+            if skipped_tables:
+                print(f"⚠ Skipped {len(skipped_tables)} tables: {', '.join(skipped_tables[:3])}{'...' if len(skipped_tables) > 3 else ''}")
             print(f"{'='*60}\n")
-            if self.scan_all_schemas:
-                print(f"\n💡 Note: Tables use schema.table format (e.g., aemo.calendar, dbo.results)")
-                print(f"   Default schema for operations: {self.schema}\n")
         except Exception as e:
             print(f"❌ Error attaching lakehouse: {e}")

{duckrun-0.2.3 → duckrun-0.2.4}/duckrun/stats.py RENAMED Viewed

@@ -21,33 +21,39 @@ def _table_exists(duckrun_instance, schema_name: str, table_name: str) -> bool:
 def _schema_exists(duckrun_instance, schema_name: str) -> bool:
-    """Check if a schema exists by trying to show its tables."""
+    """Check if a schema exists by querying information_schema."""
     try:
-        # For main schema, just show tables
+        # For main schema, always exists
         if schema_name == "main":
-            query = "SHOW TABLES"
+            return True
         else:
-            query = f"SHOW TABLES FROM {schema_name}"
-        duckrun_instance.con.execute(query)
-        return True
+            # Use information_schema which works in DuckDB 1.2.2
+            query = f"SELECT 1 FROM information_schema.schemata WHERE schema_name = '{schema_name}' LIMIT 1"
+            result = duckrun_instance.con.execute(query).fetchall()
+            return len(result) > 0
     except:
         return False
 def _get_existing_tables_in_schema(duckrun_instance, schema_name: str) -> list:
-    """Get all existing tables in a schema by showing tables, excluding temporary tables."""
+    """Get all existing tables in a schema using information_schema, excluding temporary tables."""
     try:
-        # For main schema, just show tables
+        # For main schema, use SHOW TABLES
         if schema_name == "main":
             query = "SHOW TABLES"
+            result = duckrun_instance.con.execute(query).fetchall()
+            if result:
+                tables = [row[0] for row in result]
+                filtered_tables = [tbl for tbl in tables if not tbl.startswith('tbl_')]
+                return filtered_tables
         else:
-            query = f"SHOW TABLES FROM {schema_name}"
-        result = duckrun_instance.con.execute(query).fetchall()
-        if result:
-            # Filter out temporary tables created by stats processing (tbl_0, tbl_1, etc.)
-            tables = [row[0] for row in result]
-            filtered_tables = [tbl for tbl in tables if not tbl.startswith('tbl_')]
-            return filtered_tables
+            # Use information_schema which works in DuckDB 1.2.2
+            query = f"SELECT table_name FROM information_schema.tables WHERE table_schema = '{schema_name}'"
+            result = duckrun_instance.con.execute(query).fetchall()
+            if result:
+                tables = [row[0] for row in result]
+                filtered_tables = [tbl for tbl in tables if not tbl.startswith('tbl_')]
+                return filtered_tables
         return []
     except:
         return []
@@ -218,7 +224,7 @@ def get_stats(duckrun_instance, source: str):
         WHERE tbl IS NOT NULL
         GROUP BY tbl
         ORDER BY total_rows DESC
-    ''').fetch_arrow_table()
+    ''').df()
     return final_result

{duckrun-0.2.3 → duckrun-0.2.4}/duckrun.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: duckrun
-Version: 0.2.3
+Version: 0.2.4
 Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
 Author: mim
 License: MIT

{duckrun-0.2.3 → duckrun-0.2.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "duckrun"
-version = "0.2.3"
+version = "0.2.4"
 description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
 readme = "README.md"
 license = {text = "MIT"}