duckrun 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckrun/core.py CHANGED
@@ -8,6 +8,9 @@ from string import Template
8
8
  import obstore as obs
9
9
  from obstore.store import AzureStore
10
10
 
11
+ # Row Group configuration for optimal Delta Lake performance
12
+ RG = 8_000_000
13
+
11
14
 
12
15
  class DeltaWriter:
13
16
  """Spark-style write API for Delta Lake"""
@@ -48,7 +51,7 @@ class DeltaWriter:
48
51
  df = self.relation.record_batch()
49
52
 
50
53
  print(f"Writing to Delta table: {schema}.{table} (mode={self._mode})")
51
- write_deltalake(path, df, mode=self._mode)
54
+ write_deltalake(path, df, mode=self._mode, max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
52
55
 
53
56
  self.duckrun.con.sql(f"DROP VIEW IF EXISTS {table}")
54
57
  self.duckrun.con.sql(f"""
@@ -406,7 +409,7 @@ class Duckrun:
406
409
  if mode == 'overwrite':
407
410
  self.con.sql(f"DROP VIEW IF EXISTS {normalized_table}")
408
411
  df = self.con.sql(sql).record_batch()
409
- write_deltalake(path, df, mode='overwrite')
412
+ write_deltalake(path, df, mode='overwrite', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
410
413
  self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
411
414
  dt = DeltaTable(path)
412
415
  dt.vacuum(retention_hours=0, dry_run=False, enforce_retention_duration=False)
@@ -414,7 +417,7 @@ class Duckrun:
414
417
 
415
418
  elif mode == 'append':
416
419
  df = self.con.sql(sql).record_batch()
417
- write_deltalake(path, df, mode='append')
420
+ write_deltalake(path, df, mode='append', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
418
421
  self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
419
422
  dt = DeltaTable(path)
420
423
  if len(dt.file_uris()) > self.compaction_threshold:
@@ -431,7 +434,7 @@ class Duckrun:
431
434
  print(f"Table {normalized_table} doesn't exist. Creating...")
432
435
  self.con.sql(f"DROP VIEW IF EXISTS {normalized_table}")
433
436
  df = self.con.sql(sql).record_batch()
434
- write_deltalake(path, df, mode='overwrite')
437
+ write_deltalake(path, df, mode='overwrite', max_rows_per_file=RG, max_rows_per_group=RG, min_rows_per_group=RG)
435
438
  self.con.sql(f"CREATE OR REPLACE VIEW {normalized_table} AS SELECT * FROM delta_scan('{path}')")
436
439
  dt = DeltaTable(path)
437
440
  dt.vacuum(dry_run=False)
@@ -450,6 +453,7 @@ class Duckrun:
450
453
 
451
454
  Returns:
452
455
  True if all tasks succeeded
456
+ False if any task failed (exception) or Python task returned 0 (early exit)
453
457
  """
454
458
  if self.sql_folder is None:
455
459
  raise RuntimeError("sql_folder is not configured. Cannot run pipelines.")
@@ -460,22 +464,33 @@ class Duckrun:
460
464
  print('='*60)
461
465
 
462
466
  try:
467
+ result = None
468
+
463
469
  if len(task) == 2:
464
470
  name, second = task
465
471
  if isinstance(second, str) and second in {'overwrite', 'append', 'ignore'}:
466
- self._run_sql(name, second, {})
472
+ result = self._run_sql(name, second, {})
467
473
  else:
468
474
  args = second if isinstance(second, (tuple, list)) else (second,)
469
- self._run_python(name, tuple(args))
475
+ result = self._run_python(name, tuple(args))
470
476
 
471
477
  elif len(task) == 3:
472
478
  table, mode, params = task
473
479
  if not isinstance(params, dict):
474
480
  raise ValueError(f"Expected dict for params, got {type(params)}")
475
- self._run_sql(table, mode, params)
481
+ result = self._run_sql(table, mode, params)
476
482
 
477
483
  else:
478
484
  raise ValueError(f"Invalid task format: {task}")
485
+
486
+ # Check if Python task returned 0 (early exit condition)
487
+ # Only check for Python tasks as SQL tasks return table names (strings) and only stop on exceptions
488
+ if (len(task) == 2 and
489
+ not isinstance(task[1], str) and
490
+ result == 0):
491
+ print(f"\n⏹️ Python task {i} returned 0 - stopping pipeline execution")
492
+ print(f" Remaining tasks ({len(pipeline) - i}) will not be executed")
493
+ return False
479
494
 
480
495
  except Exception as e:
481
496
  print(f"\n❌ Task {i} failed: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  Author: mim
6
6
  License: MIT
@@ -11,7 +11,7 @@ Requires-Python: >=3.9
11
11
  Description-Content-Type: text/markdown
12
12
  License-File: LICENSE
13
13
  Requires-Dist: duckdb>=1.2.0
14
- Requires-Dist: deltalake>=0.18.2
14
+ Requires-Dist: deltalake<=0.18.2
15
15
  Requires-Dist: requests>=2.28.0
16
16
  Requires-Dist: obstore>=0.2.0
17
17
  Provides-Extra: local
@@ -0,0 +1,7 @@
1
+ duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
+ duckrun/core.py,sha256=0ShFCe9tPmrd2fsH_AHCwzSQ03gXbaWWwmgNTyi5pYo,34064
3
+ duckrun-0.1.9.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
+ duckrun-0.1.9.dist-info/METADATA,sha256=vno1WFSAviBvJfBc09zPmELxYD6LTmpIWj52sqUYRaE,13847
5
+ duckrun-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ duckrun-0.1.9.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
+ duckrun-0.1.9.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
- duckrun/core.py,sha256=PzeY1WJVhAGTOuN5Yf86oNhKpK_zw6GYdylZ_BdSJfg,32982
3
- duckrun-0.1.7.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
- duckrun-0.1.7.dist-info/METADATA,sha256=BIsqAq6Z1JwSv7RwJ6wthzTC7xKSDeigZfVom5RJH0s,13847
5
- duckrun-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- duckrun-0.1.7.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
- duckrun-0.1.7.dist-info/RECORD,,