duckrun 0.2.19.dev0__tar.gz → 0.2.19.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/PKG-INFO +1 -1
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun/stats.py +8 -41
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun.egg-info/PKG-INFO +1 -1
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/pyproject.toml +1 -1
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/LICENSE +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/README.md +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun/__init__.py +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun/auth.py +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun/core.py +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun/files.py +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun/lakehouse.py +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun/notebook.py +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun/runner.py +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun/semantic_model.py +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun/writer.py +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun.egg-info/SOURCES.txt +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun.egg-info/requires.txt +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.2.19.dev0 → duckrun-0.2.19.dev1}/setup.cfg +0 -0
|
@@ -290,25 +290,16 @@ def get_stats(duckrun_instance, source: str = None, detailed = False):
|
|
|
290
290
|
else:
|
|
291
291
|
# Get parquet metadata and create temp table with compression info
|
|
292
292
|
if detailed == True:
|
|
293
|
-
# Detailed mode: Include
|
|
293
|
+
# Detailed mode: Include ALL parquet_metadata columns
|
|
294
294
|
con.execute(f'''
|
|
295
295
|
CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
|
|
296
296
|
SELECT
|
|
297
297
|
'{schema_name}' as schema,
|
|
298
298
|
'{tbl}' as tbl,
|
|
299
|
-
pm.file_name,
|
|
300
|
-
pm.row_group_id,
|
|
301
|
-
pm.row_group_num_rows,
|
|
302
|
-
pm.row_group_num_columns,
|
|
303
|
-
pm.row_group_bytes,
|
|
304
299
|
{vorder} as vorder,
|
|
305
|
-
pm
|
|
306
|
-
pm.total_compressed_size,
|
|
307
|
-
pm.total_uncompressed_size,
|
|
308
|
-
ROUND(pm.total_compressed_size::DOUBLE / NULLIF(pm.total_uncompressed_size, 0), 4) as compression_ratio,
|
|
300
|
+
pm.*,
|
|
309
301
|
'{timestamp}' as timestamp
|
|
310
302
|
FROM parquet_metadata({delta}) pm
|
|
311
|
-
WHERE pm.column_id = 0 -- Only include first column to avoid duplication per column
|
|
312
303
|
''')
|
|
313
304
|
else:
|
|
314
305
|
# Aggregated mode: Original summary statistics
|
|
@@ -377,25 +368,16 @@ def get_stats(duckrun_instance, source: str = None, detailed = False):
|
|
|
377
368
|
|
|
378
369
|
# Use parquet_file_metadata to get actual parquet stats with compression
|
|
379
370
|
if detailed == True:
|
|
380
|
-
# Detailed mode: Include
|
|
371
|
+
# Detailed mode: Include ALL parquet_metadata columns
|
|
381
372
|
con.execute(f'''
|
|
382
373
|
CREATE OR REPLACE TEMP TABLE tbl_{idx} AS
|
|
383
374
|
SELECT
|
|
384
375
|
'{schema_name}' as schema,
|
|
385
376
|
'{tbl}' as tbl,
|
|
386
|
-
pm.file_name,
|
|
387
|
-
pm.row_group_id,
|
|
388
|
-
pm.row_group_num_rows,
|
|
389
|
-
pm.row_group_num_columns,
|
|
390
|
-
pm.row_group_bytes,
|
|
391
377
|
false as vorder,
|
|
392
|
-
pm
|
|
393
|
-
pm.total_compressed_size,
|
|
394
|
-
pm.total_uncompressed_size,
|
|
395
|
-
ROUND(pm.total_compressed_size::DOUBLE / NULLIF(pm.total_uncompressed_size, 0), 4) as compression_ratio,
|
|
378
|
+
pm.*,
|
|
396
379
|
'{timestamp}' as timestamp
|
|
397
380
|
FROM parquet_metadata({filenames}) pm
|
|
398
|
-
WHERE pm.column_id = 0 -- Only include first column to avoid duplication per column
|
|
399
381
|
''')
|
|
400
382
|
else:
|
|
401
383
|
# Aggregated mode: Original summary statistics
|
|
@@ -431,9 +413,7 @@ def get_stats(duckrun_instance, source: str = None, detailed = False):
|
|
|
431
413
|
print("⚠️ No tables could be processed successfully")
|
|
432
414
|
import pandas as pd
|
|
433
415
|
if detailed == True:
|
|
434
|
-
return pd.DataFrame(columns=['schema', 'tbl', '
|
|
435
|
-
'row_group_num_columns', 'row_group_bytes', 'vorder', 'compression',
|
|
436
|
-
'total_compressed_size', 'total_uncompressed_size', 'compression_ratio', 'timestamp'])
|
|
416
|
+
return pd.DataFrame(columns=['schema', 'tbl', 'vorder', 'timestamp'])
|
|
437
417
|
else:
|
|
438
418
|
return pd.DataFrame(columns=['schema', 'tbl', 'total_rows', 'num_files', 'num_row_group',
|
|
439
419
|
'average_row_group', 'file_size_MB', 'vorder', 'compression', 'timestamp'])
|
|
@@ -444,25 +424,12 @@ def get_stats(duckrun_instance, source: str = None, detailed = False):
|
|
|
444
424
|
|
|
445
425
|
# Generate final summary based on detailed flag
|
|
446
426
|
if detailed == True:
|
|
447
|
-
# Detailed mode: Return
|
|
427
|
+
# Detailed mode: Return ALL parquet_metadata columns
|
|
448
428
|
final_result = con.execute(f'''
|
|
449
|
-
SELECT
|
|
450
|
-
schema,
|
|
451
|
-
tbl,
|
|
452
|
-
file_name,
|
|
453
|
-
row_group_id,
|
|
454
|
-
row_group_num_rows,
|
|
455
|
-
row_group_num_columns,
|
|
456
|
-
row_group_bytes,
|
|
457
|
-
vorder,
|
|
458
|
-
compression,
|
|
459
|
-
total_compressed_size,
|
|
460
|
-
total_uncompressed_size,
|
|
461
|
-
compression_ratio,
|
|
462
|
-
timestamp
|
|
429
|
+
SELECT *
|
|
463
430
|
FROM ({union_query})
|
|
464
431
|
WHERE tbl IS NOT NULL
|
|
465
|
-
ORDER BY schema, tbl, file_name, row_group_id
|
|
432
|
+
ORDER BY schema, tbl, file_name, row_group_id, column_id
|
|
466
433
|
''').df()
|
|
467
434
|
else:
|
|
468
435
|
# Aggregated mode: Original summary statistics
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "duckrun"
|
|
7
|
-
version = "0.2.19.
|
|
7
|
+
version = "0.2.19.dev1"
|
|
8
8
|
description = "Helper library for Fabric Python using duckdb, arrow and delta_rs (orchestration, queries, etc.)"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|