databricks-bundle-decorators 0.12.2__tar.gz → 0.12.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/PKG-INFO +1 -1
  2. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/pyproject.toml +1 -1
  3. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/backfill.py +84 -0
  4. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_manager.py +3 -3
  5. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/polars_delta.py +12 -2
  6. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/spark_delta.py +5 -1
  7. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/spark_uc.py +10 -2
  8. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/merge.py +23 -5
  9. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/README.md +0 -0
  10. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/__init__.py +0 -0
  11. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/__init__.py +0 -0
  12. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_app.py +0 -0
  13. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_codegen.py +0 -0
  14. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_compute.py +0 -0
  15. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_data.py +0 -0
  16. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_display.py +0 -0
  17. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_fetch.py +0 -0
  18. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_figures.py +0 -0
  19. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_pages.py +0 -0
  20. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_template.py +0 -0
  21. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/cli.py +0 -0
  22. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/codegen.py +0 -0
  23. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/context.py +0 -0
  24. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/decorators.py +0 -0
  25. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/discovery.py +0 -0
  26. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/__init__.py +0 -0
  27. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/polars_csv.py +0 -0
  28. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/polars_json.py +0 -0
  29. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/polars_parquet.py +0 -0
  30. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/spark_parquet.py +0 -0
  31. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/registry.py +0 -0
  32. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/runtime.py +0 -0
  33. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/sdk_types.py +0 -0
  34. {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/task_values.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: databricks-bundle-decorators
3
- Version: 0.12.2
3
+ Version: 0.12.4
4
4
  Summary: Decorator-based framework for defining Databricks jobs and tasks as Python code.
5
5
  Author: boccileonardo
6
6
  Author-email: boccileonardo <leonardobocci99@hotmail.com>
@@ -4,7 +4,7 @@ requires = [ "uv-build>=0.10.4,<1" ]
4
4
 
5
5
  [project]
6
6
  name = "databricks-bundle-decorators"
7
- version = "0.12.2"
7
+ version = "0.12.4"
8
8
  description = "Decorator-based framework for defining Databricks jobs and tasks as Python code."
9
9
  readme = "README.md"
10
10
  authors = [ { name = "boccileonardo", email = "leonardobocci99@hotmail.com" } ]
@@ -278,6 +278,27 @@ class DailyBackfill(BackfillDef):
278
278
  if self.data_lag:
279
279
  e = e.subtract(days=self.data_lag)
280
280
 
281
+ # Clamp overrides to definition bounds
282
+ defn_start = self._parse(self.start_date)
283
+ if s < defn_start:
284
+ _logger.warning(
285
+ "Start '%s' is before the backfill start_date '%s'; "
286
+ "skipping out-of-range periods",
287
+ start,
288
+ self.start_date,
289
+ )
290
+ s = defn_start
291
+ if self.end_date is not None:
292
+ defn_end = self._parse(self.end_date)
293
+ if e > defn_end:
294
+ _logger.warning(
295
+ "End '%s' is after the backfill end_date '%s'; "
296
+ "skipping out-of-range periods",
297
+ end,
298
+ self.end_date,
299
+ )
300
+ e = defn_end
301
+
281
302
  keys: list[str] = []
282
303
  while s <= e:
283
304
  keys.append(s.format(self._FMT))
@@ -354,6 +375,27 @@ class WeeklyBackfill(BackfillDef):
354
375
  weekday_offset = today.day_of_week().value - 1 # 0 for Monday
355
376
  e = today.subtract(days=weekday_offset)
356
377
 
378
+ # Clamp overrides to definition bounds
379
+ defn_start = self._parse_iso_week(self.start_date)
380
+ if s < defn_start:
381
+ _logger.warning(
382
+ "Start '%s' is before the backfill start_date '%s'; "
383
+ "skipping out-of-range periods",
384
+ start,
385
+ self.start_date,
386
+ )
387
+ s = defn_start
388
+ if self.end_date is not None:
389
+ defn_end = self._parse_iso_week(self.end_date)
390
+ if e > defn_end:
391
+ _logger.warning(
392
+ "End '%s' is after the backfill end_date '%s'; "
393
+ "skipping out-of-range periods",
394
+ end,
395
+ self.end_date,
396
+ )
397
+ e = defn_end
398
+
357
399
  keys: list[str] = []
358
400
  while s <= e:
359
401
  keys.append(self._fmt_iso_week(s))
@@ -422,6 +464,27 @@ class MonthlyBackfill(BackfillDef):
422
464
  # Current month
423
465
  e = today.replace(day=1)
424
466
 
467
+ # Clamp overrides to definition bounds
468
+ defn_start = self._parse_month(self.start_date)
469
+ if s < defn_start:
470
+ _logger.warning(
471
+ "Start '%s' is before the backfill start_date '%s'; "
472
+ "skipping out-of-range periods",
473
+ start,
474
+ self.start_date,
475
+ )
476
+ s = defn_start
477
+ if self.end_date is not None:
478
+ defn_end = self._parse_month(self.end_date)
479
+ if e > defn_end:
480
+ _logger.warning(
481
+ "End '%s' is after the backfill end_date '%s'; "
482
+ "skipping out-of-range periods",
483
+ end,
484
+ self.end_date,
485
+ )
486
+ e = defn_end
487
+
425
488
  keys: list[str] = []
426
489
  while s <= e:
427
490
  keys.append(s.format(self._FMT))
@@ -502,6 +565,27 @@ class HourlyBackfill(BackfillDef):
502
565
  if self.data_lag:
503
566
  e = e.subtract(hours=self.data_lag)
504
567
 
568
+ # Clamp overrides to definition bounds
569
+ defn_start = self._parse_hour(self.start_date)
570
+ if s < defn_start:
571
+ _logger.warning(
572
+ "Start '%s' is before the backfill start_date '%s'; "
573
+ "skipping out-of-range periods",
574
+ start,
575
+ self.start_date,
576
+ )
577
+ s = defn_start
578
+ if self.end_date is not None:
579
+ defn_end = self._parse_hour(self.end_date)
580
+ if e > defn_end:
581
+ _logger.warning(
582
+ "End '%s' is after the backfill end_date '%s'; "
583
+ "skipping out-of-range periods",
584
+ end,
585
+ self.end_date,
586
+ )
587
+ e = defn_end
588
+
505
589
  keys: list[str] = []
506
590
  seen: set[str] = set()
507
591
  cur = s
@@ -354,13 +354,13 @@ class IoManager(ABC):
354
354
  before_sleep_log,
355
355
  retry,
356
356
  stop_after_attempt,
357
- wait_exponential,
357
+ wait_exponential_jitter,
358
358
  )
359
359
 
360
360
  retryer = retry(
361
361
  stop=stop_after_attempt(self.retry.max_attempts),
362
- wait=wait_exponential(
363
- multiplier=self.retry.delay,
362
+ wait=wait_exponential_jitter(
363
+ initial=self.retry.delay,
364
364
  exp_base=self.retry.backoff_factor,
365
365
  ),
366
366
  reraise=True,
@@ -202,7 +202,12 @@ class PolarsDeltaIoManager(IoManager):
202
202
  merger = obj._build_merger(uri, storage_options=self.storage_options)
203
203
  if merger is None:
204
204
  # Target table doesn't exist yet — write source data directly.
205
- obj._initial_write(uri, storage_options=self.storage_options)
205
+ obj._initial_write(
206
+ uri,
207
+ storage_options=self.storage_options,
208
+ partition_by=context.partition_by,
209
+ write_options=dict(self._write_options),
210
+ )
206
211
  else:
207
212
  merger.execute()
208
213
  self._last_partition_values = {}
@@ -287,7 +292,12 @@ class PolarsDeltaIoManager(IoManager):
287
292
  def _execute_merge() -> None:
288
293
  merger = obj._build_merger(uri, storage_options=self.storage_options)
289
294
  if merger is None:
290
- obj._initial_write(uri, storage_options=self.storage_options)
295
+ obj._initial_write(
296
+ uri,
297
+ storage_options=self.storage_options,
298
+ partition_by=context.partition_by,
299
+ write_options=dict(self._write_options),
300
+ )
291
301
  else:
292
302
  merger.execute()
293
303
 
@@ -89,7 +89,11 @@ class _SparkDeltaBase(IoManager):
89
89
  )
90
90
  builder = obj._build_spark_merger(uri)
91
91
  if builder is None:
92
- obj._initial_spark_write(uri)
92
+ obj._initial_spark_write(
93
+ uri,
94
+ partition_by=context.partition_by,
95
+ write_options=dict(self._write_options),
96
+ )
93
97
  else:
94
98
  builder.execute()
95
99
  self._last_partition_values = {}
@@ -147,7 +147,11 @@ class SparkUCTableIoManager(IoManager):
147
147
  )
148
148
  builder = obj._build_spark_merger(table)
149
149
  if builder is None:
150
- obj._initial_spark_write(table)
150
+ obj._initial_spark_write(
151
+ table,
152
+ partition_by=context.partition_by,
153
+ write_options=dict(self._write_options),
154
+ )
151
155
  else:
152
156
  builder.execute()
153
157
  self._last_partition_values = {}
@@ -325,7 +329,11 @@ class SparkUCVolumeDeltaIoManager(IoManager):
325
329
  )
326
330
  builder = obj._build_spark_merger(uri)
327
331
  if builder is None:
328
- obj._initial_spark_write(uri)
332
+ obj._initial_spark_write(
333
+ uri,
334
+ partition_by=context.partition_by,
335
+ write_options=dict(self._write_options),
336
+ )
329
337
  else:
330
338
  builder.execute()
331
339
  self._last_partition_values = {}
@@ -256,7 +256,11 @@ class DeltaMerge:
256
256
  return type(self.source).__module__.startswith("pyspark.")
257
257
 
258
258
  def _initial_write(
259
- self, table_uri: str, storage_options: dict[str, str] | None = None
259
+ self,
260
+ table_uri: str,
261
+ storage_options: dict[str, str] | None = None,
262
+ partition_by: list[str] | None = None,
263
+ write_options: dict[str, Any] | None = None,
260
264
  ) -> None:
261
265
  """Write source data directly when the target table doesn't exist yet.
262
266
 
@@ -265,15 +269,20 @@ class DeltaMerge:
265
269
  """
266
270
  import polars as pl # noqa: PLC0415
267
271
 
272
+ opts: dict[str, Any] = dict(write_options or {})
273
+ if partition_by:
274
+ delta_opts: dict[str, Any] = opts.setdefault("delta_write_options", {})
275
+ delta_opts.setdefault("partition_by", partition_by)
276
+
268
277
  source = self.source
269
278
  if isinstance(source, pl.LazyFrame):
270
- source.sink_delta(table_uri, storage_options=storage_options)
279
+ source.sink_delta(table_uri, storage_options=storage_options, **opts)
271
280
  elif isinstance(source, pl.DataFrame):
272
- source.write_delta(table_uri, storage_options=storage_options)
281
+ source.write_delta(table_uri, storage_options=storage_options, **opts)
273
282
  else:
274
283
  # PyArrow or other — convert to polars first
275
284
  df = pl.DataFrame(pl.from_arrow(source))
276
- df.write_delta(table_uri, storage_options=storage_options)
285
+ df.write_delta(table_uri, storage_options=storage_options, **opts)
277
286
 
278
287
  def _build_spark_merger(self, table_identifier: str) -> Any:
279
288
  """Build a fresh ``delta.tables.DeltaMergeBuilder`` for Spark.
@@ -329,7 +338,12 @@ class DeltaMerge:
329
338
 
330
339
  return builder
331
340
 
332
- def _initial_spark_write(self, table_identifier: str) -> None:
341
+ def _initial_spark_write(
342
+ self,
343
+ table_identifier: str,
344
+ partition_by: list[str] | None = None,
345
+ write_options: dict[str, str] | None = None,
346
+ ) -> None:
333
347
  """Write source Spark DataFrame directly when the target doesn't exist.
334
348
 
335
349
  Called by the Spark IoManager on first run.
@@ -339,6 +353,10 @@ class DeltaMerge:
339
353
  )
340
354
 
341
355
  writer = self.source.write.format("delta").mode("error")
356
+ if partition_by:
357
+ writer = writer.partitionBy(*partition_by)
358
+ for k, v in (write_options or {}).items():
359
+ writer = writer.option(k, v)
342
360
  if is_path:
343
361
  writer.save(table_identifier)
344
362
  else: