duckrun 0.1.7__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {duckrun-0.1.7 → duckrun-0.1.9}/PKG-INFO +2 -2
- {duckrun-0.1.7 → duckrun-0.1.9}/duckrun/core.py +22 -7
- {duckrun-0.1.7 → duckrun-0.1.9}/duckrun.egg-info/PKG-INFO +2 -2
- {duckrun-0.1.7 → duckrun-0.1.9}/duckrun.egg-info/requires.txt +1 -1
- {duckrun-0.1.7 → duckrun-0.1.9}/pyproject.toml +2 -2
- {duckrun-0.1.7 → duckrun-0.1.9}/LICENSE +0 -0
- {duckrun-0.1.7 → duckrun-0.1.9}/README.md +0 -0
- {duckrun-0.1.7 → duckrun-0.1.9}/duckrun/__init__.py +0 -0
- {duckrun-0.1.7 → duckrun-0.1.9}/duckrun.egg-info/SOURCES.txt +0 -0
- {duckrun-0.1.7 → duckrun-0.1.9}/duckrun.egg-info/dependency_links.txt +0 -0
- {duckrun-0.1.7 → duckrun-0.1.9}/duckrun.egg-info/top_level.txt +0 -0
- {duckrun-0.1.7 → duckrun-0.1.9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: duckrun
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.9
|
4
4
|
Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
|
5
5
|
Author: mim
|
6
6
|
License: MIT
|
@@ -11,7 +11,7 @@ Requires-Python: >=3.9
|
|
11
11
|
Description-Content-Type: text/markdown
|
12
12
|
License-File: LICENSE
|
13
13
|
Requires-Dist: duckdb>=1.2.0
|
14
|
-
Requires-Dist: deltalake
|
14
|
+
Requires-Dist: deltalake<=0.18.2
|
15
15
|
Requires-Dist: requests>=2.28.0
|
16
16
|
Requires-Dist: obstore>=0.2.0
|
17
17
|
Provides-Extra: local
|
@@ -8,6 +8,9 @@ from string import Template
|
|
8
8
|
import obstore as obs
|
9
9
|
from obstore.store import AzureStore
|
10
10
|
|
11
|
+
# Row Group configuration for optimal Delta Lake performance
|
12
|
+
RG = 8_000_000
|
13
|
+
|
11
14
|
|
12
15
|
class DeltaWriter:
|
13
16
|
"""Spark-style write API for Delta Lake"""
|
@@ -48,7 +51,7 @@ class DeltaWriter:
|
|
48
51
|
df = self.relation.record_batch()
|
49
52
|
|
50
53
|
print(f"Writing to Delta table: {schema}.{table} (mode={self._mode})")
|
51
|
-
write_deltalake(path, df, mode=self._mode)
|
54
|
+
write_deltalake(path, df, mode=self._mode, max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
|
52
55
|
|
53
56
|
self.duckrun.con.sql(f"DROP VIEW IF EXISTS {table}")
|
54
57
|
self.duckrun.con.sql(f"""
|
@@ -406,7 +409,7 @@ class Duckrun:
|
|
406
409
|
if mode == 'overwrite':
|
407
410
|
self.con.sql(f"DROP VIEW IF EXISTS {normalized_table}")
|
408
411
|
df = self.con.sql(sql).record_batch()
|
409
|
-
write_deltalake(path, df, mode='overwrite')
|
412
|
+
write_deltalake(path, df, mode='overwrite', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
|
410
413
|
self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
|
411
414
|
dt = DeltaTable(path)
|
412
415
|
dt.vacuum(retention_hours=0, dry_run=False, enforce_retention_duration=False)
|
@@ -414,7 +417,7 @@ class Duckrun:
|
|
414
417
|
|
415
418
|
elif mode == 'append':
|
416
419
|
df = self.con.sql(sql).record_batch()
|
417
|
-
write_deltalake(path, df, mode='append')
|
420
|
+
write_deltalake(path, df, mode='append', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
|
418
421
|
self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
|
419
422
|
dt = DeltaTable(path)
|
420
423
|
if len(dt.file_uris()) > self.compaction_threshold:
|
@@ -431,7 +434,7 @@ class Duckrun:
|
|
431
434
|
print(f"Table {normalized_table} doesn't exist. Creating...")
|
432
435
|
self.con.sql(f"DROP VIEW IF EXISTS {normalized_table}")
|
433
436
|
df = self.con.sql(sql).record_batch()
|
434
|
-
write_deltalake(path, df, mode='overwrite')
|
437
|
+
write_deltalake(path, df, mode='overwrite', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
|
435
438
|
self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
|
436
439
|
dt = DeltaTable(path)
|
437
440
|
dt.vacuum(dry_run=False)
|
@@ -450,6 +453,7 @@ class Duckrun:
|
|
450
453
|
|
451
454
|
Returns:
|
452
455
|
True if all tasks succeeded
|
456
|
+
False if any task failed (exception) or Python task returned 0 (early exit)
|
453
457
|
"""
|
454
458
|
if self.sql_folder is None:
|
455
459
|
raise RuntimeError("sql_folder is not configured. Cannot run pipelines.")
|
@@ -460,22 +464,33 @@ class Duckrun:
|
|
460
464
|
print('='*60)
|
461
465
|
|
462
466
|
try:
|
467
|
+
result = None
|
468
|
+
|
463
469
|
if len(task) == 2:
|
464
470
|
name, second = task
|
465
471
|
if isinstance(second, str) and second in {'overwrite', 'append', 'ignore'}:
|
466
|
-
self._run_sql(name, second, {})
|
472
|
+
result = self._run_sql(name, second, {})
|
467
473
|
else:
|
468
474
|
args = second if isinstance(second, (tuple, list)) else (second,)
|
469
|
-
self._run_python(name, tuple(args))
|
475
|
+
result = self._run_python(name, tuple(args))
|
470
476
|
|
471
477
|
elif len(task) == 3:
|
472
478
|
table, mode, params = task
|
473
479
|
if not isinstance(params, dict):
|
474
480
|
raise ValueError(f"Expected dict for params, got {type(params)}")
|
475
|
-
self._run_sql(table, mode, params)
|
481
|
+
result = self._run_sql(table, mode, params)
|
476
482
|
|
477
483
|
else:
|
478
484
|
raise ValueError(f"Invalid task format: {task}")
|
485
|
+
|
486
|
+
# Check if Python task returned 0 (early exit condition)
|
487
|
+
# Only check for Python tasks as SQL tasks return table names (strings) and only stop on exceptions
|
488
|
+
if (len(task) == 2 and
|
489
|
+
not isinstance(task[1], str) and
|
490
|
+
result == 0):
|
491
|
+
print(f"\n⏹️ Python task {i} returned 0 - stopping pipeline execution")
|
492
|
+
print(f" Remaining tasks ({len(pipeline) - i}) will not be executed")
|
493
|
+
return False
|
479
494
|
|
480
495
|
except Exception as e:
|
481
496
|
print(f"\n❌ Task {i} failed: {e}")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: duckrun
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.9
|
4
4
|
Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
|
5
5
|
Author: mim
|
6
6
|
License: MIT
|
@@ -11,7 +11,7 @@ Requires-Python: >=3.9
|
|
11
11
|
Description-Content-Type: text/markdown
|
12
12
|
License-File: LICENSE
|
13
13
|
Requires-Dist: duckdb>=1.2.0
|
14
|
-
Requires-Dist: deltalake
|
14
|
+
Requires-Dist: deltalake<=0.18.2
|
15
15
|
Requires-Dist: requests>=2.28.0
|
16
16
|
Requires-Dist: obstore>=0.2.0
|
17
17
|
Provides-Extra: local
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "duckrun"
|
7
|
-
version = "0.1.
|
7
|
+
version = "0.1.9"
|
8
8
|
description = "Lakehouse task runner powered by DuckDB for Microsoft Fabric"
|
9
9
|
readme = "README.md"
|
10
10
|
license = {text = "MIT"}
|
@@ -14,7 +14,7 @@ authors = [
|
|
14
14
|
requires-python = ">=3.9"
|
15
15
|
dependencies = [
|
16
16
|
"duckdb>=1.2.0",
|
17
|
-
"deltalake
|
17
|
+
"deltalake<=0.18.2",
|
18
18
|
"requests>=2.28.0",
|
19
19
|
"obstore>=0.2.0"
|
20
20
|
]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|