PyPI - duckrun - Versions diffs - 0.2.18.dev1__tar.gz → 0.2.18.dev2__tar.gz - Mend

duckrun 0.2.18.dev1tar.gz → 0.2.18.dev2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of duckrun might be problematic. Click here for more details.

Files changed (20) hide show

{duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: duckrun
-Version: 0.2.18.dev1
+Version: 0.2.18.dev2
 Summary: Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)
 Author: mim
 License: MIT

{duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/__init__.py RENAMED Viewed

@@ -3,7 +3,7 @@
 from duckrun.core import Duckrun
 from duckrun.notebook import import_notebook_from_web, import_notebook
-__version__ = "0.2.18.dev1"
+__version__ = "0.2.18.dev2"
 # Expose unified connect method at module level
 connect = Duckrun.connect

{duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/core.py RENAMED Viewed

@@ -1035,12 +1035,13 @@ class Duckrun(WorkspaceOperationsMixin):
         """Get underlying DuckDB connection"""
         return self.con
-    def get_stats(self, source: str):
+    def get_stats(self, source: str = None):
         """
         Get comprehensive statistics for Delta Lake tables.
         Args:
-            source: Can be one of:
+            source: Optional. Can be one of:
+                   - None: Use all tables in the connection's schema (default)
                    - Table name: 'table_name' (uses current schema)
                    - Schema.table: 'schema.table_name' (specific table in schema)
                    - Schema only: 'schema' (all tables in schema)
@@ -1052,6 +1053,9 @@ class Duckrun(WorkspaceOperationsMixin):
         Examples:
             con = duckrun.connect("tmp/data.lakehouse/aemo")
+            # All tables in current schema (aemo)
+            stats = con.get_stats()
             # Single table in current schema
             stats = con.get_stats('price')

{duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/semantic_model.py RENAMED Viewed

@@ -130,13 +130,24 @@ def check_dataset_exists(dataset_name, workspace_id, client):
 def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
-    """Refresh a dataset and monitor progress using Power BI API"""
+    """Refresh a dataset and monitor progress using Power BI API
+    For DirectLake models, performs a two-step refresh:
+    1. clearValues - Purges data from memory
+    2. full - Reframes data from Delta tables
+    """
     # If dataset_id not provided, look it up by name
     if not dataset_id:
         dataset_id = get_dataset_id(dataset_name, workspace_id, client)
-    payload = {
+    # Use Power BI API for refresh (not Fabric API)
+    powerbi_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes"
+    headers = client._get_headers()
+    # Step 1: clearValues - Purge data from memory
+    print("   Step 1: Clearing values from memory...")
+    clearvalues_payload = {
         "type": "clearValues",
         "commitMode": "transactional",
         "maxParallelism": 10,
@@ -144,11 +155,24 @@ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
         "objects": []
     }
-    # Use Power BI API for refresh (not Fabric API)
-    powerbi_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes"
-    headers = client._get_headers()
+    response = requests.post(powerbi_url, headers=headers, json=clearvalues_payload)
+    if response.status_code in [200, 202]:
+        print("   ✓ Clear values completed")
+    else:
+        response.raise_for_status()
+    # Step 2: full refresh - Reframe data from Delta tables
+    print("   Step 2: Full refresh to reframe data...")
+    full_payload = {
+        "type": "full",
+        "commitMode": "transactional",
+        "maxParallelism": 10,
+        "retryCount": 2,
+        "objects": []
+    }
-    response = requests.post(powerbi_url, headers=headers, json=payload)
+    response = requests.post(powerbi_url, headers=headers, json=full_payload)
     if response.status_code in [200, 202]:
         print(f"✓ Refresh initiated")
@@ -471,13 +495,13 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
         dataset_exists = check_dataset_exists(dataset_name, workspace_id, client)
         if dataset_exists:
-            print(f"\n✓ Dataset exists - refreshing...")
+            print(f"✓ Dataset '{dataset_name}' already exists - skipping deployment")
             if wait_seconds > 0:
                 print(f"   Waiting {wait_seconds} seconds...")
                 time.sleep(wait_seconds)
-            print("\n[Step 6/6] Refreshing semantic model...")
+            print("\n[Step 3/3] Refreshing existing semantic model...")
             refresh_dataset(dataset_name, workspace_id, client)
             print("\n" + "=" * 70)

{duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/stats.py RENAMED Viewed

@@ -60,13 +60,14 @@ def _get_existing_tables_in_schema(duckrun_instance, schema_name: str) -> list:
         return []
-def get_stats(duckrun_instance, source: str):
+def get_stats(duckrun_instance, source: str = None):
     """
     Get comprehensive statistics for Delta Lake tables.
     Args:
         duckrun_instance: The Duckrun connection instance
-        source: Can be one of:
+        source: Optional. Can be one of:
+               - None: Use all tables in the connection's schema (default)
                - Table name: 'table_name' (uses main schema in DuckDB)
                - Schema.table: 'schema.table_name' (specific table in schema, if multi-schema)
                - Schema only: 'schema' (all tables in schema, if multi-schema)
@@ -78,6 +79,9 @@ def get_stats(duckrun_instance, source: str):
     Examples:
         con = duckrun.connect("tmp/data.lakehouse/test")
+        # All tables in the connection's schema
+        stats = con.get_stats()
         # Single table in main schema (DuckDB uses 'main', not 'test')
         stats = con.get_stats('price_today')
@@ -93,6 +97,10 @@ def get_stats(duckrun_instance, source: str):
     duckdb_schema = "main"
     url_schema = duckrun_instance.schema  # This is from the connection URL path
+    # If source is not provided, default to all tables in the connection's schema
+    if source is None:
+        source = url_schema
     # Parse the source and validate existence
     if '.' in source:
         # Format: schema.table - only valid if multi-schema is enabled

{duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: duckrun
-Version: 0.2.18.dev1
+Version: 0.2.18.dev2
 Summary: Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)
 Author: mim
 License: MIT

{duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "duckrun"
-version = "0.2.18.dev1"
+version = "0.2.18.dev2"
 description = "Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)"
 readme = "README.md"
 license = {text = "MIT"}