duckrun 0.2.18.dev2__py3-none-any.whl → 0.2.19.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of duckrun might be problematic. Click here for more details.
- duckrun/__init__.py +1 -1
- duckrun/core.py +28 -14
- duckrun/notebook.py +2 -0
- duckrun/semantic_model.py +115 -13
- duckrun/stats.py +227 -67
- {duckrun-0.2.18.dev2.dist-info → duckrun-0.2.19.dev0.dist-info}/METADATA +1 -1
- duckrun-0.2.19.dev0.dist-info/RECORD +15 -0
- duckrun-0.2.18.dev2.dist-info/RECORD +0 -15
- {duckrun-0.2.18.dev2.dist-info → duckrun-0.2.19.dev0.dist-info}/WHEEL +0 -0
- {duckrun-0.2.18.dev2.dist-info → duckrun-0.2.19.dev0.dist-info}/licenses/LICENSE +0 -0
- {duckrun-0.2.18.dev2.dist-info → duckrun-0.2.19.dev0.dist-info}/top_level.txt +0 -0
duckrun/__init__.py
CHANGED
duckrun/core.py
CHANGED
|
@@ -1035,7 +1035,7 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1035
1035
|
"""Get underlying DuckDB connection"""
|
|
1036
1036
|
return self.con
|
|
1037
1037
|
|
|
1038
|
-
def get_stats(self, source: str = None):
|
|
1038
|
+
def get_stats(self, source: str = None, detailed = False):
|
|
1039
1039
|
"""
|
|
1040
1040
|
Get comprehensive statistics for Delta Lake tables.
|
|
1041
1041
|
|
|
@@ -1045,27 +1045,34 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1045
1045
|
- Table name: 'table_name' (uses current schema)
|
|
1046
1046
|
- Schema.table: 'schema.table_name' (specific table in schema)
|
|
1047
1047
|
- Schema only: 'schema' (all tables in schema)
|
|
1048
|
+
detailed: Optional. Controls the level of detail in statistics:
|
|
1049
|
+
- False (default): Aggregated table-level stats
|
|
1050
|
+
- True: Row group level statistics with compression details
|
|
1048
1051
|
|
|
1049
1052
|
Returns:
|
|
1050
|
-
|
|
1051
|
-
|
|
1053
|
+
DataFrame with statistics based on detailed parameter:
|
|
1054
|
+
- If detailed=False: Aggregated table-level summary
|
|
1055
|
+
- If detailed=True: Granular file and row group level stats
|
|
1052
1056
|
|
|
1053
1057
|
Examples:
|
|
1054
1058
|
con = duckrun.connect("tmp/data.lakehouse/aemo")
|
|
1055
1059
|
|
|
1056
|
-
# All tables in current schema (aemo)
|
|
1060
|
+
# All tables in current schema (aemo) - aggregated
|
|
1057
1061
|
stats = con.get_stats()
|
|
1058
1062
|
|
|
1059
|
-
# Single table in current schema
|
|
1063
|
+
# Single table in current schema - aggregated
|
|
1060
1064
|
stats = con.get_stats('price')
|
|
1061
1065
|
|
|
1066
|
+
# Single table with detailed row group statistics
|
|
1067
|
+
stats_detailed = con.get_stats('price', detailed=True)
|
|
1068
|
+
|
|
1062
1069
|
# Specific table in different schema
|
|
1063
1070
|
stats = con.get_stats('aemo.price')
|
|
1064
1071
|
|
|
1065
1072
|
# All tables in a schema
|
|
1066
1073
|
stats = con.get_stats('aemo')
|
|
1067
1074
|
"""
|
|
1068
|
-
return _get_stats(self, source)
|
|
1075
|
+
return _get_stats(self, source, detailed)
|
|
1069
1076
|
|
|
1070
1077
|
def list_lakehouses(self) -> List[str]:
|
|
1071
1078
|
"""
|
|
@@ -1179,7 +1186,7 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1179
1186
|
return False
|
|
1180
1187
|
|
|
1181
1188
|
def deploy(self, bim_url: str, dataset_name: Optional[str] = None,
|
|
1182
|
-
wait_seconds: int = 5) -> int:
|
|
1189
|
+
wait_seconds: int = 5, refresh: str = "full") -> int:
|
|
1183
1190
|
"""
|
|
1184
1191
|
Deploy a semantic model from a BIM file using DirectLake mode.
|
|
1185
1192
|
|
|
@@ -1188,8 +1195,11 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1188
1195
|
- URL: "https://raw.githubusercontent.com/.../model.bim"
|
|
1189
1196
|
- Local file: "model.bim"
|
|
1190
1197
|
- Workspace/Model: "workspace_name/model_name"
|
|
1191
|
-
dataset_name: Name for the semantic model (default:
|
|
1198
|
+
dataset_name: Name for the semantic model (default: schema name)
|
|
1192
1199
|
wait_seconds: Seconds to wait for permission propagation (default: 5)
|
|
1200
|
+
refresh: Refresh strategy:
|
|
1201
|
+
- "full": Clear values and process full refresh (default)
|
|
1202
|
+
- "ignore": Skip refresh entirely
|
|
1193
1203
|
|
|
1194
1204
|
Returns:
|
|
1195
1205
|
1 for success, 0 for failure
|
|
@@ -1197,14 +1207,17 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1197
1207
|
Examples:
|
|
1198
1208
|
dr = Duckrun.connect("My Workspace/My Lakehouse.lakehouse/dbo")
|
|
1199
1209
|
|
|
1210
|
+
# Deploy with schema name as dataset name (dbo)
|
|
1211
|
+
dr.deploy("https://github.com/.../model.bim")
|
|
1212
|
+
|
|
1200
1213
|
# Deploy from workspace/model (uses same name by default)
|
|
1201
1214
|
dr.deploy("Source Workspace/Source Model") # Creates "Source Model"
|
|
1202
1215
|
|
|
1203
1216
|
# Deploy with custom name
|
|
1204
|
-
dr.deploy("
|
|
1217
|
+
dr.deploy("https://github.com/.../model.bim", dataset_name="Sales Model")
|
|
1205
1218
|
|
|
1206
|
-
# Deploy
|
|
1207
|
-
dr.deploy("https://
|
|
1219
|
+
# Deploy without refresh
|
|
1220
|
+
dr.deploy("https://github.com/.../model.bim", refresh="ignore")
|
|
1208
1221
|
"""
|
|
1209
1222
|
from .semantic_model import deploy_semantic_model
|
|
1210
1223
|
|
|
@@ -1216,9 +1229,9 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1216
1229
|
if len(parts) == 2:
|
|
1217
1230
|
dataset_name = parts[1] # Use the model name
|
|
1218
1231
|
else:
|
|
1219
|
-
dataset_name =
|
|
1232
|
+
dataset_name = self.schema # Use schema name
|
|
1220
1233
|
else:
|
|
1221
|
-
dataset_name =
|
|
1234
|
+
dataset_name = self.schema # Use schema name
|
|
1222
1235
|
|
|
1223
1236
|
# Call the deployment function (DirectLake only)
|
|
1224
1237
|
return deploy_semantic_model(
|
|
@@ -1227,7 +1240,8 @@ class Duckrun(WorkspaceOperationsMixin):
|
|
|
1227
1240
|
schema_name=self.schema,
|
|
1228
1241
|
dataset_name=dataset_name,
|
|
1229
1242
|
bim_url_or_path=bim_url,
|
|
1230
|
-
wait_seconds=wait_seconds
|
|
1243
|
+
wait_seconds=wait_seconds,
|
|
1244
|
+
refresh=refresh
|
|
1231
1245
|
)
|
|
1232
1246
|
|
|
1233
1247
|
def close(self):
|
duckrun/notebook.py
CHANGED
|
@@ -160,6 +160,7 @@ def import_notebook_from_web(
|
|
|
160
160
|
update_url = f"{base_url}/workspaces/{workspace_id}/notebooks/{notebook_id}/updateDefinition"
|
|
161
161
|
payload = {
|
|
162
162
|
"definition": {
|
|
163
|
+
"format": "ipynb",
|
|
163
164
|
"parts": [
|
|
164
165
|
{
|
|
165
166
|
"path": "notebook-content.py",
|
|
@@ -192,6 +193,7 @@ def import_notebook_from_web(
|
|
|
192
193
|
payload = {
|
|
193
194
|
"displayName": notebook_name,
|
|
194
195
|
"definition": {
|
|
196
|
+
"format": "ipynb",
|
|
195
197
|
"parts": [
|
|
196
198
|
{
|
|
197
199
|
"path": "notebook-content.py",
|
duckrun/semantic_model.py
CHANGED
|
@@ -129,14 +129,21 @@ def check_dataset_exists(dataset_name, workspace_id, client):
|
|
|
129
129
|
return False
|
|
130
130
|
|
|
131
131
|
|
|
132
|
-
def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
|
|
132
|
+
def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None, refresh="full"):
|
|
133
133
|
"""Refresh a dataset and monitor progress using Power BI API
|
|
134
134
|
|
|
135
|
-
For DirectLake models, performs
|
|
136
|
-
|
|
137
|
-
|
|
135
|
+
For DirectLake models, performs refresh based on refresh parameter:
|
|
136
|
+
- refresh="full": Two-step refresh (clearValues + full reframe)
|
|
137
|
+
- refresh="ignore": Skip refresh entirely
|
|
138
|
+
|
|
139
|
+
If a refresh is already in progress, waits for it to complete before starting a new one.
|
|
138
140
|
"""
|
|
139
141
|
|
|
142
|
+
# Skip refresh entirely if refresh is "ignore"
|
|
143
|
+
if refresh == "ignore":
|
|
144
|
+
print(" Ignoring refresh - skipping refresh")
|
|
145
|
+
return
|
|
146
|
+
|
|
140
147
|
# If dataset_id not provided, look it up by name
|
|
141
148
|
if not dataset_id:
|
|
142
149
|
dataset_id = get_dataset_id(dataset_name, workspace_id, client)
|
|
@@ -145,6 +152,46 @@ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
|
|
|
145
152
|
powerbi_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes"
|
|
146
153
|
headers = client._get_headers()
|
|
147
154
|
|
|
155
|
+
# Check for in-progress refreshes
|
|
156
|
+
print(" Checking for in-progress refreshes...")
|
|
157
|
+
try:
|
|
158
|
+
status_response = requests.get(f"{powerbi_url}?$top=1", headers=headers)
|
|
159
|
+
if status_response.status_code == 200:
|
|
160
|
+
refreshes = status_response.json().get('value', [])
|
|
161
|
+
if refreshes:
|
|
162
|
+
latest_refresh = refreshes[0]
|
|
163
|
+
status = latest_refresh.get('status')
|
|
164
|
+
if status in ['InProgress', 'Unknown']:
|
|
165
|
+
refresh_id = latest_refresh.get('requestId')
|
|
166
|
+
print(f" ⚠️ Found in-progress refresh (ID: {refresh_id})")
|
|
167
|
+
print(f" Waiting for current refresh to complete...")
|
|
168
|
+
|
|
169
|
+
# Wait for the in-progress refresh to complete
|
|
170
|
+
max_wait_attempts = 60
|
|
171
|
+
for attempt in range(max_wait_attempts):
|
|
172
|
+
time.sleep(5)
|
|
173
|
+
check_response = requests.get(f"{powerbi_url}/{refresh_id}", headers=headers)
|
|
174
|
+
if check_response.status_code == 200:
|
|
175
|
+
current_status = check_response.json().get('status')
|
|
176
|
+
|
|
177
|
+
if current_status == 'Completed':
|
|
178
|
+
print(f" ✓ Previous refresh completed")
|
|
179
|
+
break
|
|
180
|
+
elif current_status == 'Failed':
|
|
181
|
+
print(f" ⚠️ Previous refresh failed, continuing with new refresh")
|
|
182
|
+
break
|
|
183
|
+
elif current_status == 'Cancelled':
|
|
184
|
+
print(f" ⚠️ Previous refresh was cancelled, continuing with new refresh")
|
|
185
|
+
break
|
|
186
|
+
|
|
187
|
+
if attempt % 6 == 0:
|
|
188
|
+
print(f" Still waiting... (status: {current_status})")
|
|
189
|
+
else:
|
|
190
|
+
print(f" ⚠️ Timeout waiting for previous refresh, will attempt new refresh anyway")
|
|
191
|
+
except Exception as e:
|
|
192
|
+
print(f" ⚠️ Could not check refresh status: {e}")
|
|
193
|
+
print(f" Continuing with refresh attempt...")
|
|
194
|
+
|
|
148
195
|
# Step 1: clearValues - Purge data from memory
|
|
149
196
|
print(" Step 1: Clearing values from memory...")
|
|
150
197
|
clearvalues_payload = {
|
|
@@ -158,9 +205,45 @@ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
|
|
|
158
205
|
response = requests.post(powerbi_url, headers=headers, json=clearvalues_payload)
|
|
159
206
|
|
|
160
207
|
if response.status_code in [200, 202]:
|
|
161
|
-
|
|
208
|
+
# For 202, monitor the clearValues operation
|
|
209
|
+
if response.status_code == 202:
|
|
210
|
+
location = response.headers.get('Location')
|
|
211
|
+
if location:
|
|
212
|
+
clear_refresh_id = location.split('/')[-1]
|
|
213
|
+
print(" ✓ Clear values initiated, monitoring progress...")
|
|
214
|
+
|
|
215
|
+
max_attempts = 60
|
|
216
|
+
for attempt in range(max_attempts):
|
|
217
|
+
time.sleep(2)
|
|
218
|
+
|
|
219
|
+
status_url = f"https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/refreshes/{clear_refresh_id}"
|
|
220
|
+
status_response = requests.get(status_url, headers=headers)
|
|
221
|
+
status_response.raise_for_status()
|
|
222
|
+
status = status_response.json().get('status')
|
|
223
|
+
|
|
224
|
+
if status == 'Completed':
|
|
225
|
+
print(f" ✓ Clear values completed")
|
|
226
|
+
break
|
|
227
|
+
elif status == 'Failed':
|
|
228
|
+
error = status_response.json().get('serviceExceptionJson', '')
|
|
229
|
+
raise Exception(f"Clear values failed: {error}")
|
|
230
|
+
elif status == 'Cancelled':
|
|
231
|
+
raise Exception("Clear values was cancelled")
|
|
232
|
+
|
|
233
|
+
if attempt % 10 == 0 and attempt > 0:
|
|
234
|
+
print(f" Clear values status: {status}...")
|
|
235
|
+
else:
|
|
236
|
+
raise Exception(f"Clear values timed out")
|
|
237
|
+
else:
|
|
238
|
+
print(" ✓ Clear values completed")
|
|
162
239
|
else:
|
|
163
|
-
|
|
240
|
+
# Provide detailed error message
|
|
241
|
+
try:
|
|
242
|
+
error_details = response.json()
|
|
243
|
+
error_message = error_details.get('error', {}).get('message', response.text)
|
|
244
|
+
raise Exception(f"Clear values failed with status {response.status_code}: {error_message}")
|
|
245
|
+
except (json.JSONDecodeError, ValueError):
|
|
246
|
+
response.raise_for_status()
|
|
164
247
|
|
|
165
248
|
# Step 2: full refresh - Reframe data from Delta tables
|
|
166
249
|
print(" Step 2: Full refresh to reframe data...")
|
|
@@ -175,7 +258,7 @@ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
|
|
|
175
258
|
response = requests.post(powerbi_url, headers=headers, json=full_payload)
|
|
176
259
|
|
|
177
260
|
if response.status_code in [200, 202]:
|
|
178
|
-
print(f"✓ Refresh initiated")
|
|
261
|
+
print(f" ✓ Refresh initiated")
|
|
179
262
|
|
|
180
263
|
# For 202, get the refresh_id from the Location header
|
|
181
264
|
if response.status_code == 202:
|
|
@@ -207,7 +290,13 @@ def refresh_dataset(dataset_name, workspace_id, client, dataset_id=None):
|
|
|
207
290
|
|
|
208
291
|
raise Exception(f"Refresh timed out")
|
|
209
292
|
else:
|
|
210
|
-
|
|
293
|
+
# Provide detailed error message
|
|
294
|
+
try:
|
|
295
|
+
error_details = response.json()
|
|
296
|
+
error_message = error_details.get('error', {}).get('message', response.text)
|
|
297
|
+
raise Exception(f"Refresh request failed with status {response.status_code}: {error_message}")
|
|
298
|
+
except (json.JSONDecodeError, ValueError):
|
|
299
|
+
response.raise_for_status()
|
|
211
300
|
|
|
212
301
|
|
|
213
302
|
def download_bim_from_github(url_or_path):
|
|
@@ -455,7 +544,7 @@ def create_dataset_from_bim(dataset_name, bim_content, workspace_id, client):
|
|
|
455
544
|
|
|
456
545
|
|
|
457
546
|
def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_name, dataset_name,
|
|
458
|
-
bim_url_or_path, wait_seconds=5):
|
|
547
|
+
bim_url_or_path, wait_seconds=5, refresh="full"):
|
|
459
548
|
"""
|
|
460
549
|
Deploy a semantic model using DirectLake mode.
|
|
461
550
|
|
|
@@ -466,6 +555,9 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
|
|
|
466
555
|
dataset_name: Name for the semantic model
|
|
467
556
|
bim_url_or_path: URL to the BIM file or local file path (e.g., 'model.bim' or 'https://...')
|
|
468
557
|
wait_seconds: Seconds to wait before refresh (default: 5)
|
|
558
|
+
refresh: Refresh strategy (default: "full")
|
|
559
|
+
- "full": Clear values and process full refresh
|
|
560
|
+
- "ignore": Skip refresh entirely
|
|
469
561
|
|
|
470
562
|
Returns:
|
|
471
563
|
1 for success, 0 for failure
|
|
@@ -478,6 +570,9 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
|
|
|
478
570
|
# Using a local file
|
|
479
571
|
dr.deploy("./my_model.bim")
|
|
480
572
|
dr.deploy("C:/path/to/model.bim")
|
|
573
|
+
|
|
574
|
+
# Deploy without refresh
|
|
575
|
+
dr.deploy("./my_model.bim", refresh="ignore")
|
|
481
576
|
"""
|
|
482
577
|
print("=" * 70)
|
|
483
578
|
print("Semantic Model Deployment (DirectLake)")
|
|
@@ -502,7 +597,7 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
|
|
|
502
597
|
time.sleep(wait_seconds)
|
|
503
598
|
|
|
504
599
|
print("\n[Step 3/3] Refreshing existing semantic model...")
|
|
505
|
-
refresh_dataset(dataset_name, workspace_id, client)
|
|
600
|
+
refresh_dataset(dataset_name, workspace_id, client, refresh=refresh)
|
|
506
601
|
|
|
507
602
|
print("\n" + "=" * 70)
|
|
508
603
|
print("🎉 Refresh Completed!")
|
|
@@ -534,7 +629,7 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
|
|
|
534
629
|
|
|
535
630
|
# Step 6: Refresh using the dataset ID returned from creation
|
|
536
631
|
print("\n[Step 6/6] Refreshing semantic model...")
|
|
537
|
-
refresh_dataset(dataset_name, workspace_id, client, dataset_id=dataset_id)
|
|
632
|
+
refresh_dataset(dataset_name, workspace_id, client, dataset_id=dataset_id, refresh=refresh)
|
|
538
633
|
|
|
539
634
|
print("\n" + "=" * 70)
|
|
540
635
|
print("🎉 Deployment Completed!")
|
|
@@ -561,7 +656,7 @@ def deploy_semantic_model(workspace_name_or_id, lakehouse_name_or_id, schema_nam
|
|
|
561
656
|
return 0
|
|
562
657
|
|
|
563
658
|
|
|
564
|
-
def copy_model(ws_source, model_name, destination, new_model_name=None, wait_seconds=5):
|
|
659
|
+
def copy_model(ws_source, model_name, destination, new_model_name=None, wait_seconds=5, refresh="full"):
|
|
565
660
|
"""
|
|
566
661
|
Copy a semantic model from one workspace to another.
|
|
567
662
|
|
|
@@ -574,6 +669,9 @@ def copy_model(ws_source, model_name, destination, new_model_name=None, wait_sec
|
|
|
574
669
|
destination: Destination in format "workspace/lakehouse.lakehouse/schema"
|
|
575
670
|
new_model_name: Name for the new semantic model (default: same as source)
|
|
576
671
|
wait_seconds: Seconds to wait before refresh (default: 5)
|
|
672
|
+
refresh: Refresh strategy (default: "full")
|
|
673
|
+
- "full": Clear values and process full refresh
|
|
674
|
+
- "ignore": Skip refresh entirely
|
|
577
675
|
|
|
578
676
|
Returns:
|
|
579
677
|
1 for success, 0 for failure
|
|
@@ -586,6 +684,9 @@ def copy_model(ws_source, model_name, destination, new_model_name=None, wait_sec
|
|
|
586
684
|
copy_model("Source WS", "Production Model", "Target WS/Data Lake.lakehouse/analytics",
|
|
587
685
|
new_model_name="Production Model - Copy")
|
|
588
686
|
|
|
687
|
+
# Copy without refresh
|
|
688
|
+
copy_model("Source WS", "Model", "Target WS/LH.lakehouse/dbo", refresh="ignore")
|
|
689
|
+
|
|
589
690
|
# Using the connect pattern
|
|
590
691
|
import duckrun
|
|
591
692
|
duckrun.semantic_model.copy_model("Source", "Model", "Target/LH.lakehouse/dbo")
|
|
@@ -712,7 +813,8 @@ def copy_model(ws_source, model_name, destination, new_model_name=None, wait_sec
|
|
|
712
813
|
schema_name=schema,
|
|
713
814
|
dataset_name=new_model_name,
|
|
714
815
|
bim_url_or_path=temp_bim_path,
|
|
715
|
-
wait_seconds=wait_seconds
|
|
816
|
+
wait_seconds=wait_seconds,
|
|
817
|
+
refresh=refresh
|
|
716
818
|
)
|
|
717
819
|
|
|
718
820
|
# Clean up temp file
|
duckrun/stats.py
CHANGED
|
@@ -60,7 +60,50 @@ def _get_existing_tables_in_schema(duckrun_instance, schema_name: str) -> list:
|
|
|
60
60
|
return []
|
|
61
61
|
|
|
62
62
|
|
|
63
|
-
def
|
|
63
|
+
def _match_tables_by_pattern(duckrun_instance, pattern: str) -> dict:
|
|
64
|
+
"""Match tables across all schemas using a wildcard pattern.
|
|
65
|
+
Pattern can be:
|
|
66
|
+
- '*.summary' - matches 'summary' table in all schemas
|
|
67
|
+
- '*summary' - matches any table ending with 'summary'
|
|
68
|
+
- 'schema.*' - matches all tables in 'schema'
|
|
69
|
+
Returns a dict mapping schema names to lists of matching table names."""
|
|
70
|
+
import fnmatch
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
# Query all schemas and tables in one go
|
|
74
|
+
query = """
|
|
75
|
+
SELECT table_schema, table_name
|
|
76
|
+
FROM information_schema.tables
|
|
77
|
+
WHERE table_schema NOT LIKE 'pg_%'
|
|
78
|
+
AND table_schema != 'information_schema'
|
|
79
|
+
AND table_name NOT LIKE 'tbl_%'
|
|
80
|
+
"""
|
|
81
|
+
result = duckrun_instance.con.execute(query).fetchall()
|
|
82
|
+
|
|
83
|
+
matched = {}
|
|
84
|
+
|
|
85
|
+
# Check if pattern contains a dot (schema.table pattern)
|
|
86
|
+
if '.' in pattern:
|
|
87
|
+
schema_pattern, table_pattern = pattern.split('.', 1)
|
|
88
|
+
for schema, table in result:
|
|
89
|
+
if fnmatch.fnmatch(schema, schema_pattern) and fnmatch.fnmatch(table, table_pattern):
|
|
90
|
+
if schema not in matched:
|
|
91
|
+
matched[schema] = []
|
|
92
|
+
matched[schema].append(table)
|
|
93
|
+
else:
|
|
94
|
+
# Pattern matches only table names
|
|
95
|
+
for schema, table in result:
|
|
96
|
+
if fnmatch.fnmatch(table, pattern):
|
|
97
|
+
if schema not in matched:
|
|
98
|
+
matched[schema] = []
|
|
99
|
+
matched[schema].append(table)
|
|
100
|
+
|
|
101
|
+
return matched
|
|
102
|
+
except:
|
|
103
|
+
return {}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def get_stats(duckrun_instance, source: str = None, detailed = False):
|
|
64
107
|
"""
|
|
65
108
|
Get comprehensive statistics for Delta Lake tables.
|
|
66
109
|
|
|
@@ -71,25 +114,35 @@ def get_stats(duckrun_instance, source: str = None):
|
|
|
71
114
|
- Table name: 'table_name' (uses main schema in DuckDB)
|
|
72
115
|
- Schema.table: 'schema.table_name' (specific table in schema, if multi-schema)
|
|
73
116
|
- Schema only: 'schema' (all tables in schema, if multi-schema)
|
|
117
|
+
- Wildcard pattern: '*.summary' (matches tables across all schemas)
|
|
118
|
+
detailed: Optional. Controls the level of detail in statistics:
|
|
119
|
+
- False (default): Aggregated table-level stats (total rows, file count,
|
|
120
|
+
row groups, average row group size, file sizes, VORDER status)
|
|
121
|
+
- True: Row group level statistics with compression details, row group sizes,
|
|
122
|
+
and parquet metadata
|
|
74
123
|
|
|
75
124
|
Returns:
|
|
76
|
-
|
|
77
|
-
|
|
125
|
+
DataFrame with statistics based on detailed parameter:
|
|
126
|
+
- If detailed=False: Aggregated table-level summary
|
|
127
|
+
- If detailed=True: Granular file and row group level stats
|
|
78
128
|
|
|
79
129
|
Examples:
|
|
80
130
|
con = duckrun.connect("tmp/data.lakehouse/test")
|
|
81
131
|
|
|
82
|
-
# All tables in the connection's schema
|
|
132
|
+
# All tables in the connection's schema (aggregated)
|
|
83
133
|
stats = con.get_stats()
|
|
84
134
|
|
|
85
|
-
# Single table
|
|
86
|
-
|
|
135
|
+
# Single table with detailed row group statistics
|
|
136
|
+
stats_detailed = con.get_stats('price_today', detailed=True)
|
|
87
137
|
|
|
88
138
|
# Specific table in different schema (only if multi-schema enabled)
|
|
89
139
|
stats = con.get_stats('aemo.price')
|
|
90
140
|
|
|
91
141
|
# All tables in a schema (only if multi-schema enabled)
|
|
92
142
|
stats = con.get_stats('aemo')
|
|
143
|
+
|
|
144
|
+
# Wildcard pattern across all schemas (only if multi-schema enabled)
|
|
145
|
+
stats = con.get_stats('*.summary')
|
|
93
146
|
"""
|
|
94
147
|
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
95
148
|
|
|
@@ -101,8 +154,27 @@ def get_stats(duckrun_instance, source: str = None):
|
|
|
101
154
|
if source is None:
|
|
102
155
|
source = url_schema
|
|
103
156
|
|
|
157
|
+
# Check if source contains wildcard characters
|
|
158
|
+
if '*' in source or '?' in source:
|
|
159
|
+
# Wildcard pattern mode - only valid if multi-schema is enabled
|
|
160
|
+
if not duckrun_instance.scan_all_schemas:
|
|
161
|
+
raise ValueError(f"Wildcard pattern '{source}' not supported. Connection was made to a specific schema '{url_schema}'. Enable multi-schema mode to use wildcards.")
|
|
162
|
+
|
|
163
|
+
matched_tables = _match_tables_by_pattern(duckrun_instance, source)
|
|
164
|
+
|
|
165
|
+
if not matched_tables:
|
|
166
|
+
raise ValueError(f"No tables found matching pattern '{source}'")
|
|
167
|
+
|
|
168
|
+
# Flatten the matched tables into a list with schema info
|
|
169
|
+
tables_with_schemas = []
|
|
170
|
+
for schema, tables in matched_tables.items():
|
|
171
|
+
for table in tables:
|
|
172
|
+
tables_with_schemas.append((schema, table))
|
|
173
|
+
|
|
174
|
+
print(f"Found {len(tables_with_schemas)} tables matching pattern '{source}'")
|
|
175
|
+
|
|
104
176
|
# Parse the source and validate existence
|
|
105
|
-
|
|
177
|
+
elif '.' in source:
|
|
106
178
|
# Format: schema.table - only valid if multi-schema is enabled
|
|
107
179
|
schema_name, table_name = source.split('.', 1)
|
|
108
180
|
|
|
@@ -113,46 +185,45 @@ def get_stats(duckrun_instance, source: str = None):
|
|
|
113
185
|
if not _table_exists(duckrun_instance, schema_name, table_name):
|
|
114
186
|
raise ValueError(f"Table '{table_name}' does not exist in schema '{schema_name}'")
|
|
115
187
|
|
|
116
|
-
|
|
188
|
+
tables_with_schemas = [(schema_name, table_name)]
|
|
117
189
|
else:
|
|
118
190
|
# Could be just table name or schema name
|
|
119
191
|
if duckrun_instance.scan_all_schemas:
|
|
120
192
|
# Multi-schema mode: DuckDB has actual schemas
|
|
121
193
|
# First check if it's a table in main schema
|
|
122
194
|
if _table_exists(duckrun_instance, duckdb_schema, source):
|
|
123
|
-
|
|
124
|
-
schema_name = duckdb_schema
|
|
195
|
+
tables_with_schemas = [(duckdb_schema, source)]
|
|
125
196
|
# Otherwise, check if it's a schema name
|
|
126
197
|
elif _schema_exists(duckrun_instance, source):
|
|
127
198
|
schema_name = source
|
|
128
199
|
list_tables = _get_existing_tables_in_schema(duckrun_instance, source)
|
|
129
200
|
if not list_tables:
|
|
130
201
|
raise ValueError(f"Schema '{source}' exists but contains no tables")
|
|
202
|
+
tables_with_schemas = [(schema_name, tbl) for tbl in list_tables]
|
|
131
203
|
else:
|
|
132
204
|
raise ValueError(f"Neither table '{source}' in main schema nor schema '{source}' exists")
|
|
133
205
|
else:
|
|
134
206
|
# Single-schema mode: tables are in DuckDB's main schema, use URL schema for file paths
|
|
135
207
|
if _table_exists(duckrun_instance, duckdb_schema, source):
|
|
136
208
|
# It's a table name
|
|
137
|
-
|
|
138
|
-
schema_name = url_schema # Use URL schema for file path construction
|
|
209
|
+
tables_with_schemas = [(url_schema, source)]
|
|
139
210
|
elif source == url_schema:
|
|
140
211
|
# Special case: user asked for stats on the URL schema name - list all tables
|
|
141
212
|
list_tables = _get_existing_tables_in_schema(duckrun_instance, duckdb_schema)
|
|
142
|
-
schema_name = url_schema # Use URL schema for file path construction
|
|
143
213
|
if not list_tables:
|
|
144
214
|
raise ValueError(f"No tables found in schema '{url_schema}'")
|
|
215
|
+
tables_with_schemas = [(url_schema, tbl) for tbl in list_tables]
|
|
145
216
|
else:
|
|
146
217
|
raise ValueError(f"Table '{source}' does not exist in the current context (schema: {url_schema})")
|
|
147
218
|
|
|
148
219
|
# Use the existing connection
|
|
149
220
|
con = duckrun_instance.con
|
|
150
221
|
|
|
151
|
-
print(f"Processing {len(
|
|
222
|
+
print(f"Processing {len(tables_with_schemas)} tables from {len(set(s for s, t in tables_with_schemas))} schema(s)")
|
|
152
223
|
|
|
153
224
|
successful_tables = []
|
|
154
|
-
for idx, tbl in enumerate(
|
|
155
|
-
print(f"[{idx+1}/{len(
|
|
225
|
+
for idx, (schema_name, tbl) in enumerate(tables_with_schemas):
|
|
226
|
+
print(f"[{idx+1}/{len(tables_with_schemas)}] Processing table '{schema_name}.{tbl}'...")
|
|
156
227
|
# Construct lakehouse path using correct ABFSS URL format (no .Lakehouse suffix)
|
|
157
228
|
table_path = f"{duckrun_instance.table_base_url}{schema_name}/{tbl}"
|
|
158
229
|
|
|
@@ -179,8 +250,18 @@ def get_stats(duckrun_instance, source: str = None):
|
|
|
179
250
|
print(f"Warning: Could not convert RecordBatch for table '{tbl}': Unexpected type {type(add_actions)}")
|
|
180
251
|
xx = {}
|
|
181
252
|
|
|
182
|
-
# Check if VORDER exists
|
|
183
|
-
|
|
253
|
+
# Check if VORDER exists - handle both formats:
|
|
254
|
+
# 1. Flattened format: 'tags.VORDER' or 'tags.vorder' in keys
|
|
255
|
+
# 2. Nested format: check in 'tags' dict for 'VORDER' or 'vorder'
|
|
256
|
+
vorder = False
|
|
257
|
+
if 'tags.VORDER' in xx.keys() or 'tags.vorder' in xx.keys():
|
|
258
|
+
vorder = True
|
|
259
|
+
elif 'tags' in xx.keys() and xx['tags']:
|
|
260
|
+
# Check nested tags dictionary (tags is a list of dicts, one per file)
|
|
261
|
+
for tag_dict in xx['tags']:
|
|
262
|
+
if tag_dict and ('VORDER' in tag_dict or 'vorder' in tag_dict):
|
|
263
|
+
vorder = True
|
|
264
|
+
break
|
|
184
265
|
|
|
185
266
|
# Calculate total size
|
|
186
267
|
total_size = sum(xx['size_bytes']) if xx['size_bytes'] else 0
|
|
@@ -195,6 +276,7 @@ def get_stats(duckrun_instance, source: str = None):
|
|
|
195
276
|
con.execute(f'''
|
|
196
277
|
CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
|
|
197
278
|
SELECT
|
|
279
|
+
'{schema_name}' as schema,
|
|
198
280
|
'{tbl}' as tbl,
|
|
199
281
|
'empty' as file_name,
|
|
200
282
|
0 as num_rows,
|
|
@@ -207,21 +289,45 @@ def get_stats(duckrun_instance, source: str = None):
|
|
|
207
289
|
''')
|
|
208
290
|
else:
|
|
209
291
|
# Get parquet metadata and create temp table with compression info
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
292
|
+
if detailed == True:
|
|
293
|
+
# Detailed mode: Include row group level statistics
|
|
294
|
+
con.execute(f'''
|
|
295
|
+
CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
|
|
296
|
+
SELECT
|
|
297
|
+
'{schema_name}' as schema,
|
|
298
|
+
'{tbl}' as tbl,
|
|
299
|
+
pm.file_name,
|
|
300
|
+
pm.row_group_id,
|
|
301
|
+
pm.row_group_num_rows,
|
|
302
|
+
pm.row_group_num_columns,
|
|
303
|
+
pm.row_group_bytes,
|
|
304
|
+
{vorder} as vorder,
|
|
305
|
+
pm.compression,
|
|
306
|
+
pm.total_compressed_size,
|
|
307
|
+
pm.total_uncompressed_size,
|
|
308
|
+
ROUND(pm.total_compressed_size::DOUBLE / NULLIF(pm.total_uncompressed_size, 0), 4) as compression_ratio,
|
|
309
|
+
'{timestamp}' as timestamp
|
|
310
|
+
FROM parquet_metadata({delta}) pm
|
|
311
|
+
WHERE pm.column_id = 0 -- Only include first column to avoid duplication per column
|
|
312
|
+
''')
|
|
313
|
+
else:
|
|
314
|
+
# Aggregated mode: Original summary statistics
|
|
315
|
+
con.execute(f'''
|
|
316
|
+
CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
|
|
317
|
+
SELECT
|
|
318
|
+
'{schema_name}' as schema,
|
|
319
|
+
'{tbl}' as tbl,
|
|
320
|
+
fm.file_name,
|
|
321
|
+
fm.num_rows,
|
|
322
|
+
fm.num_row_groups,
|
|
323
|
+
CEIL({total_size}/(1024*1024)) as size,
|
|
324
|
+
{vorder} as vorder,
|
|
325
|
+
COALESCE(STRING_AGG(DISTINCT pm.compression, ', ' ORDER BY pm.compression), 'UNCOMPRESSED') as compression,
|
|
326
|
+
'{timestamp}' as timestamp
|
|
327
|
+
FROM parquet_file_metadata({delta}) fm
|
|
328
|
+
LEFT JOIN parquet_metadata({delta}) pm ON fm.file_name = pm.file_name
|
|
329
|
+
GROUP BY fm.file_name, fm.num_rows, fm.num_row_groups
|
|
330
|
+
''')
|
|
225
331
|
|
|
226
332
|
except Exception as e:
|
|
227
333
|
error_msg = str(e)
|
|
@@ -245,6 +351,7 @@ def get_stats(duckrun_instance, source: str = None):
|
|
|
245
351
|
con.execute(f'''
|
|
246
352
|
CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
|
|
247
353
|
SELECT
|
|
354
|
+
'{schema_name}' as schema,
|
|
248
355
|
'{tbl}' as tbl,
|
|
249
356
|
'empty' as file_name,
|
|
250
357
|
0 as num_rows,
|
|
@@ -269,21 +376,45 @@ def get_stats(duckrun_instance, source: str = None):
|
|
|
269
376
|
filenames.append(table_path + "/" + filename)
|
|
270
377
|
|
|
271
378
|
# Use parquet_file_metadata to get actual parquet stats with compression
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
379
|
+
if detailed == True:
|
|
380
|
+
# Detailed mode: Include row group level statistics
|
|
381
|
+
con.execute(f'''
|
|
382
|
+
CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
|
|
383
|
+
SELECT
|
|
384
|
+
'{schema_name}' as schema,
|
|
385
|
+
'{tbl}' as tbl,
|
|
386
|
+
pm.file_name,
|
|
387
|
+
pm.row_group_id,
|
|
388
|
+
pm.row_group_num_rows,
|
|
389
|
+
pm.row_group_num_columns,
|
|
390
|
+
pm.row_group_bytes,
|
|
391
|
+
false as vorder,
|
|
392
|
+
pm.compression,
|
|
393
|
+
pm.total_compressed_size,
|
|
394
|
+
pm.total_uncompressed_size,
|
|
395
|
+
ROUND(pm.total_compressed_size::DOUBLE / NULLIF(pm.total_uncompressed_size, 0), 4) as compression_ratio,
|
|
396
|
+
'{timestamp}' as timestamp
|
|
397
|
+
FROM parquet_metadata({filenames}) pm
|
|
398
|
+
WHERE pm.column_id = 0 -- Only include first column to avoid duplication per column
|
|
399
|
+
''')
|
|
400
|
+
else:
|
|
401
|
+
# Aggregated mode: Original summary statistics
|
|
402
|
+
con.execute(f'''
|
|
403
|
+
CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
|
|
404
|
+
SELECT
|
|
405
|
+
'{schema_name}' as schema,
|
|
406
|
+
'{tbl}' as tbl,
|
|
407
|
+
fm.file_name,
|
|
408
|
+
fm.num_rows,
|
|
409
|
+
fm.num_row_groups,
|
|
410
|
+
0 as size,
|
|
411
|
+
false as vorder,
|
|
412
|
+
COALESCE(STRING_AGG(DISTINCT pm.compression, ', ' ORDER BY pm.compression), 'UNCOMPRESSED') as compression,
|
|
413
|
+
'{timestamp}' as timestamp
|
|
414
|
+
FROM parquet_file_metadata({filenames}) fm
|
|
415
|
+
LEFT JOIN parquet_metadata({filenames}) pm ON fm.file_name = pm.file_name
|
|
416
|
+
GROUP BY fm.file_name, fm.num_rows, fm.num_row_groups
|
|
417
|
+
''')
|
|
287
418
|
|
|
288
419
|
print(f" ✓ Successfully processed '{tbl}' using DuckDB fallback with parquet metadata")
|
|
289
420
|
except Exception as fallback_error:
|
|
@@ -299,30 +430,59 @@ def get_stats(duckrun_instance, source: str = None):
|
|
|
299
430
|
# No tables were processed successfully - return empty dataframe
|
|
300
431
|
print("⚠️ No tables could be processed successfully")
|
|
301
432
|
import pandas as pd
|
|
302
|
-
|
|
303
|
-
|
|
433
|
+
if detailed == True:
|
|
434
|
+
return pd.DataFrame(columns=['schema', 'tbl', 'file_name', 'row_group_id', 'row_group_num_rows',
|
|
435
|
+
'row_group_num_columns', 'row_group_bytes', 'vorder', 'compression',
|
|
436
|
+
'total_compressed_size', 'total_uncompressed_size', 'compression_ratio', 'timestamp'])
|
|
437
|
+
else:
|
|
438
|
+
return pd.DataFrame(columns=['schema', 'tbl', 'total_rows', 'num_files', 'num_row_group',
|
|
439
|
+
'average_row_group', 'file_size_MB', 'vorder', 'compression', 'timestamp'])
|
|
304
440
|
|
|
305
441
|
# Union all successfully processed temp tables
|
|
306
442
|
union_parts = [f'SELECT * FROM tbl_{i}' for i in successful_tables]
|
|
307
443
|
union_query = ' UNION ALL '.join(union_parts)
|
|
308
444
|
|
|
309
|
-
# Generate final summary
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
445
|
+
# Generate final summary based on detailed flag
|
|
446
|
+
if detailed == True:
|
|
447
|
+
# Detailed mode: Return row group level data without aggregation
|
|
448
|
+
final_result = con.execute(f'''
|
|
449
|
+
SELECT
|
|
450
|
+
schema,
|
|
451
|
+
tbl,
|
|
452
|
+
file_name,
|
|
453
|
+
row_group_id,
|
|
454
|
+
row_group_num_rows,
|
|
455
|
+
row_group_num_columns,
|
|
456
|
+
row_group_bytes,
|
|
457
|
+
vorder,
|
|
458
|
+
compression,
|
|
459
|
+
total_compressed_size,
|
|
460
|
+
total_uncompressed_size,
|
|
461
|
+
compression_ratio,
|
|
462
|
+
timestamp
|
|
463
|
+
FROM ({union_query})
|
|
464
|
+
WHERE tbl IS NOT NULL
|
|
465
|
+
ORDER BY schema, tbl, file_name, row_group_id
|
|
466
|
+
''').df()
|
|
467
|
+
else:
|
|
468
|
+
# Aggregated mode: Original summary statistics
|
|
469
|
+
final_result = con.execute(f'''
|
|
470
|
+
SELECT
|
|
471
|
+
schema,
|
|
472
|
+
tbl,
|
|
473
|
+
SUM(num_rows) as total_rows,
|
|
474
|
+
COUNT(*) as num_files,
|
|
475
|
+
SUM(num_row_groups) as num_row_group,
|
|
476
|
+
CAST(CEIL(SUM(num_rows)::DOUBLE / NULLIF(SUM(num_row_groups), 0)) AS INTEGER) as average_row_group,
|
|
477
|
+
MIN(size) as file_size_MB,
|
|
478
|
+
ANY_VALUE(vorder) as vorder,
|
|
479
|
+
STRING_AGG(DISTINCT compression, ', ' ORDER BY compression) as compression,
|
|
480
|
+
ANY_VALUE(timestamp) as timestamp
|
|
481
|
+
FROM ({union_query})
|
|
482
|
+
WHERE tbl IS NOT NULL
|
|
483
|
+
GROUP BY schema, tbl
|
|
484
|
+
ORDER BY total_rows DESC
|
|
485
|
+
''').df()
|
|
326
486
|
|
|
327
487
|
return final_result
|
|
328
488
|
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
duckrun/__init__.py,sha256=-DPOb_ETaBC0M7YqXj482FE1aZ-SxJeSeY6KB6hPgWU,350
|
|
2
|
+
duckrun/auth.py,sha256=EMaf-L2zeNOjbHOT97xYxfZNfWo4WrwrU1h3vBQTgEc,9624
|
|
3
|
+
duckrun/core.py,sha256=jpg1okp6-Y4HubTJmSjyT9uhUc5pFr4A0tcNxNujSig,69086
|
|
4
|
+
duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
|
|
5
|
+
duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
|
|
6
|
+
duckrun/notebook.py,sha256=lzDRBoWZ_lePF-_5BbA1_42BImLZC5yrq6nzlmlKglM,12183
|
|
7
|
+
duckrun/runner.py,sha256=NGVyerJA44UP2umRdndfL0fuFM_gdOZmuJUz-PLOFf0,13461
|
|
8
|
+
duckrun/semantic_model.py,sha256=shRPBN1II60K_PH8JOqke-_3hAwLspcx4Add0VJRwwU,35913
|
|
9
|
+
duckrun/stats.py,sha256=HyzfDUGvYIxJ9QM8gbT_ISmVrVeEhhbxpxg1VLAgaRQ,23862
|
|
10
|
+
duckrun/writer.py,sha256=wIsU77DSj4J7d9_bIhvk6AbC51uUrLW0e6pcSPQOY1c,9424
|
|
11
|
+
duckrun-0.2.19.dev0.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
|
12
|
+
duckrun-0.2.19.dev0.dist-info/METADATA,sha256=I2EXHQLP-Gr_O2Y3yYiAb7el4OTeuutB5P-SvisnO4g,20807
|
|
13
|
+
duckrun-0.2.19.dev0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
+
duckrun-0.2.19.dev0.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
|
15
|
+
duckrun-0.2.19.dev0.dist-info/RECORD,,
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
duckrun/__init__.py,sha256=vqv_bJjHjrrXGs8Zyxuy-GKTCyJlZ5z3npPQgE9ipBY,355
|
|
2
|
-
duckrun/auth.py,sha256=EMaf-L2zeNOjbHOT97xYxfZNfWo4WrwrU1h3vBQTgEc,9624
|
|
3
|
-
duckrun/core.py,sha256=tWLFOSVZHoJ0r5YJaj0lG1s_kehiIrnxPMrQQIcyh94,68367
|
|
4
|
-
duckrun/files.py,sha256=Fvdjg3DyHJzIVzKo8M_j-eGz4zU61lOB38Y_onbQJkI,10137
|
|
5
|
-
duckrun/lakehouse.py,sha256=j--Z3zo8AOWt1GF9VzRosmmTAy6ey2D0LVubti58twU,14109
|
|
6
|
-
duckrun/notebook.py,sha256=SzdKTpvzHiWMrvg7mCd3DN6R4gU_6Gm7gfkuETzylaE,12103
|
|
7
|
-
duckrun/runner.py,sha256=NGVyerJA44UP2umRdndfL0fuFM_gdOZmuJUz-PLOFf0,13461
|
|
8
|
-
duckrun/semantic_model.py,sha256=X3VKdo4BehAg681Ucq7fzB2KPY2mwPLbfIZqI5Gbqp4,30377
|
|
9
|
-
duckrun/stats.py,sha256=qvWnPk2P8Ob_tzaiNfdQmUQqMVq2FWv3EgArE7hPl44,15482
|
|
10
|
-
duckrun/writer.py,sha256=wIsU77DSj4J7d9_bIhvk6AbC51uUrLW0e6pcSPQOY1c,9424
|
|
11
|
-
duckrun-0.2.18.dev2.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
|
|
12
|
-
duckrun-0.2.18.dev2.dist-info/METADATA,sha256=JpewTO7QqHrdUn_G3Lz-1jxFifVyBxj9lNX_Qodhe2A,20807
|
|
13
|
-
duckrun-0.2.18.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
14
|
-
duckrun-0.2.18.dev2.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
|
|
15
|
-
duckrun-0.2.18.dev2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|