duckrun 0.2.18.dev1__tar.gz → 0.2.18.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of duckrun might be problematic. Click here for more details.
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/PKG-INFO +1 -1
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/__init__.py +1 -1
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/core.py +6 -2
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/semantic_model.py +32 -8
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/stats.py +10 -2
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun.egg-info/PKG-INFO +1 -1
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/pyproject.toml +1 -1
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/LICENSE +0 -0
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/README.md +0 -0
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/auth.py +0 -0
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/files.py +0 -0
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/lakehouse.py +0 -0
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/notebook.py +0 -0
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/runner.py +0 -0
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun/writer.py +0 -0
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun.egg-info/SOURCES.txt +0 -0
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun.egg-info/requires.txt +0 -0
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.2.18.dev1 → duckrun-0.2.18.dev2}/setup.cfg +0 -0
|
@@ -1035,12 +1035,13 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1035
1035
|
"""Get underlying DuckDB connection"""
|
|
1036
1036
|
return self.con
|
|
1037
1037
|
|
|
1038
|
-
def get_stats(self, source: str):
|
|
1038
|
+
def get_stats(self, source: str = None):
|
|
1039
1039
|
"""
|
|
1040
1040
|
Get comprehensive statistics for Delta Lake tables.
|
|
1041
1041
|
|
|
1042
1042
|
Args:
|
|
1043
|
-
source: Can be one of:
|
|
1043
|
+
source: Optional. Can be one of:
|
|
1044
|
+
- None: Use all tables in the connection's schema (default)
|
|
1044
1045
|
- Table name: 'table_name' (uses current schema)
|
|
1045
1046
|
- Schema.table: 'schema.table_name' (specific table in schema)
|
|
1046
1047
|
- Schema only: 'schema' (all tables in schema)
|
|
@@ -1052,6 +1053,9 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1052
1053
|
Examples:
|
|
1053
1054
|
con = duckrun.connect("tmp/data.lakehouse/aemo")
|
|
1054
1055
|
|
|
1056
|
+
# All tables in current schema (aemo)
|
|
1057
|
+
stats = con.get_stats()
|
|
1058
|
+
|
|
1055
1059
|
# Single table in current schema
|
|
1056
1060
|
stats = con.get_stats('price')
|
|
1057
1061
|
|
|
@@ -130,13 +130,24 @@ def check_dataset_exists(dataset_name, workspace_id, client):
|
|
|
130
130
|
|
|
131
131
|
|
|
132
132
|
def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
|
|
133
|
-
"""Refresh a dataset and monitor progress using Power BI API
|
|
133
|
+
"""Refresh a dataset and monitor progress using Power BI API
|
|
134
|
+
|
|
135
|
+
For DirectLake models, performs a two-step refresh:
|
|
136
|
+
1. clearValues - Purges data from memory
|
|
137
|
+
2. full - Reframes data from Delta tables
|
|
138
|
+
"""
|
|
134
139
|
|
|
135
140
|
# If dataset_id not provided, look it up by name
|
|
136
141
|
if not dataset_id:
|
|
137
142
|
dataset_id = get_dataset_id(dataset_name, workspace_id, client)
|
|
138
143
|
|
|
139
|
-
|
|
144
|
+
# Use Power BI API for refresh (not Fabric API)
|
|
145
|
+
powerbi_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes"
|
|
146
|
+
headers = client._get_headers()
|
|
147
|
+
|
|
148
|
+
# Step 1: clearValues - Purge data from memory
|
|
149
|
+
print(" Step 1: Clearing values from memory...")
|
|
150
|
+
clearvalues_payload = {
|
|
140
151
|
"type": "clearValues",
|
|
141
152
|
"commitMode": "transactional",
|
|
142
153
|
"maxParallelism": 10,
|
|
@@ -144,11 +155,24 @@ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
|
|
|
144
155
|
"objects": []
|
|
145
156
|
}
|
|
146
157
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
158
|
+
response = requests.post(powerbi_url, headers=headers, json=clearvalues_payload)
|
|
159
|
+
|
|
160
|
+
if response.status_code in [200, 202]:
|
|
161
|
+
print(" ✓ Clear values completed")
|
|
162
|
+
else:
|
|
163
|
+
response.raise_for_status()
|
|
164
|
+
|
|
165
|
+
# Step 2: full refresh - Reframe data from Delta tables
|
|
166
|
+
print(" Step 2: Full refresh to reframe data...")
|
|
167
|
+
full_payload = {
|
|
168
|
+
"type": "full",
|
|
169
|
+
"commitMode": "transactional",
|
|
170
|
+
"maxParallelism": 10,
|
|
171
|
+
"retryCount": 2,
|
|
172
|
+
"objects": []
|
|
173
|
+
}
|
|
150
174
|
|
|
151
|
-
response = requests.post(powerbi_url, headers=headers, json=
|
|
175
|
+
response = requests.post(powerbi_url, headers=headers, json=full_payload)
|
|
152
176
|
|
|
153
177
|
if response.status_code in [200, 202]:
|
|
154
178
|
print(f"✓ Refresh initiated")
|
|
@@ -471,13 +495,13 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
|
|
|
471
495
|
dataset_exists = check_dataset_exists(dataset_name, workspace_id, client)
|
|
472
496
|
|
|
473
497
|
if dataset_exists:
|
|
474
|
-
print(f"
|
|
498
|
+
print(f"✓ Dataset '{dataset_name}' already exists - skipping deployment")
|
|
475
499
|
|
|
476
500
|
if wait_seconds > 0:
|
|
477
501
|
print(f" Waiting {wait_seconds} seconds...")
|
|
478
502
|
time.sleep(wait_seconds)
|
|
479
503
|
|
|
480
|
-
print("\n[Step
|
|
504
|
+
print("\n[Step 3/3] Refreshing existing semantic model...")
|
|
481
505
|
refresh_dataset(dataset_name, workspace_id, client)
|
|
482
506
|
|
|
483
507
|
print("\n" + "=" * 70)
|
|
@@ -60,13 +60,14 @@ def _get_existing_tables_in_schema(duckrun_instance, schema_name: str) -> list:
|
|
|
60
60
|
return []
|
|
61
61
|
|
|
62
62
|
|
|
63
|
-
def get_stats(duckrun_instance, source: str):
|
|
63
|
+
def get_stats(duckrun_instance, source: str = None):
|
|
64
64
|
"""
|
|
65
65
|
Get comprehensive statistics for Delta Lake tables.
|
|
66
66
|
|
|
67
67
|
Args:
|
|
68
68
|
duckrun_instance: The Duckrun connection instance
|
|
69
|
-
source: Can be one of:
|
|
69
|
+
source: Optional. Can be one of:
|
|
70
|
+
- None: Use all tables in the connection's schema (default)
|
|
70
71
|
- Table name: 'table_name' (uses main schema in DuckDB)
|
|
71
72
|
- Schema.table: 'schema.table_name' (specific table in schema, if multi-schema)
|
|
72
73
|
- Schema only: 'schema' (all tables in schema, if multi-schema)
|
|
@@ -78,6 +79,9 @@ def get_stats(duckrun_instance, source: str):
|
|
|
78
79
|
Examples:
|
|
79
80
|
con = duckrun.connect("tmp/data.lakehouse/test")
|
|
80
81
|
|
|
82
|
+
# All tables in the connection's schema
|
|
83
|
+
stats = con.get_stats()
|
|
84
|
+
|
|
81
85
|
# Single table in main schema (DuckDB uses 'main', not 'test')
|
|
82
86
|
stats = con.get_stats('price_today')
|
|
83
87
|
|
|
@@ -93,6 +97,10 @@ def get_stats(duckrun_instance, source: str):
|
|
|
93
97
|
duckdb_schema = "main"
|
|
94
98
|
url_schema = duckrun_instance.schema # This is from the connection URL path
|
|
95
99
|
|
|
100
|
+
# If source is not provided, default to all tables in the connection's schema
|
|
101
|
+
if source is None:
|
|
102
|
+
source = url_schema
|
|
103
|
+
|
|
96
104
|
# Parse the source and validate existence
|
|
97
105
|
if '.' in source:
|
|
98
106
|
# Format: schema.table - only valid if multi-schema is enabled
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "duckrun"
|
|
7
|
-
version = "0.2.18.
|
|
7
|
+
version = "0.2.18.dev2"
|
|
8
8
|
description = "Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|