databricks-bundle-decorators 0.12.2__tar.gz → 0.12.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/PKG-INFO +1 -1
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/pyproject.toml +1 -1
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/backfill.py +84 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_manager.py +3 -3
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/polars_delta.py +12 -2
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/spark_delta.py +5 -1
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/spark_uc.py +10 -2
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/merge.py +23 -5
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/README.md +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/__init__.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/__init__.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_app.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_codegen.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_compute.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_data.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_display.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_fetch.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_figures.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_pages.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/app/_template.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/cli.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/codegen.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/context.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/decorators.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/discovery.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/__init__.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/polars_csv.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/polars_json.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/polars_parquet.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/io_managers/spark_parquet.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/registry.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/runtime.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/sdk_types.py +0 -0
- {databricks_bundle_decorators-0.12.2 → databricks_bundle_decorators-0.12.4}/src/databricks_bundle_decorators/task_values.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: databricks-bundle-decorators
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.4
|
|
4
4
|
Summary: Decorator-based framework for defining Databricks jobs and tasks as Python code.
|
|
5
5
|
Author: boccileonardo
|
|
6
6
|
Author-email: boccileonardo <leonardobocci99@hotmail.com>
|
|
@@ -4,7 +4,7 @@ requires = [ "uv-build>=0.10.4,<1" ]
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "databricks-bundle-decorators"
|
|
7
|
-
version = "0.12.
|
|
7
|
+
version = "0.12.4"
|
|
8
8
|
description = "Decorator-based framework for defining Databricks jobs and tasks as Python code."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [ { name = "boccileonardo", email = "leonardobocci99@hotmail.com" } ]
|
|
@@ -278,6 +278,27 @@ class DailyBackfill(BackfillDef):
|
|
|
278
278
|
if self.data_lag:
|
|
279
279
|
e = e.subtract(days=self.data_lag)
|
|
280
280
|
|
|
281
|
+
# Clamp overrides to definition bounds
|
|
282
|
+
defn_start = self._parse(self.start_date)
|
|
283
|
+
if s < defn_start:
|
|
284
|
+
_logger.warning(
|
|
285
|
+
"Start '%s' is before the backfill start_date '%s'; "
|
|
286
|
+
"skipping out-of-range periods",
|
|
287
|
+
start,
|
|
288
|
+
self.start_date,
|
|
289
|
+
)
|
|
290
|
+
s = defn_start
|
|
291
|
+
if self.end_date is not None:
|
|
292
|
+
defn_end = self._parse(self.end_date)
|
|
293
|
+
if e > defn_end:
|
|
294
|
+
_logger.warning(
|
|
295
|
+
"End '%s' is after the backfill end_date '%s'; "
|
|
296
|
+
"skipping out-of-range periods",
|
|
297
|
+
end,
|
|
298
|
+
self.end_date,
|
|
299
|
+
)
|
|
300
|
+
e = defn_end
|
|
301
|
+
|
|
281
302
|
keys: list[str] = []
|
|
282
303
|
while s <= e:
|
|
283
304
|
keys.append(s.format(self._FMT))
|
|
@@ -354,6 +375,27 @@ class WeeklyBackfill(BackfillDef):
|
|
|
354
375
|
weekday_offset = today.day_of_week().value - 1 # 0 for Monday
|
|
355
376
|
e = today.subtract(days=weekday_offset)
|
|
356
377
|
|
|
378
|
+
# Clamp overrides to definition bounds
|
|
379
|
+
defn_start = self._parse_iso_week(self.start_date)
|
|
380
|
+
if s < defn_start:
|
|
381
|
+
_logger.warning(
|
|
382
|
+
"Start '%s' is before the backfill start_date '%s'; "
|
|
383
|
+
"skipping out-of-range periods",
|
|
384
|
+
start,
|
|
385
|
+
self.start_date,
|
|
386
|
+
)
|
|
387
|
+
s = defn_start
|
|
388
|
+
if self.end_date is not None:
|
|
389
|
+
defn_end = self._parse_iso_week(self.end_date)
|
|
390
|
+
if e > defn_end:
|
|
391
|
+
_logger.warning(
|
|
392
|
+
"End '%s' is after the backfill end_date '%s'; "
|
|
393
|
+
"skipping out-of-range periods",
|
|
394
|
+
end,
|
|
395
|
+
self.end_date,
|
|
396
|
+
)
|
|
397
|
+
e = defn_end
|
|
398
|
+
|
|
357
399
|
keys: list[str] = []
|
|
358
400
|
while s <= e:
|
|
359
401
|
keys.append(self._fmt_iso_week(s))
|
|
@@ -422,6 +464,27 @@ class MonthlyBackfill(BackfillDef):
|
|
|
422
464
|
# Current month
|
|
423
465
|
e = today.replace(day=1)
|
|
424
466
|
|
|
467
|
+
# Clamp overrides to definition bounds
|
|
468
|
+
defn_start = self._parse_month(self.start_date)
|
|
469
|
+
if s < defn_start:
|
|
470
|
+
_logger.warning(
|
|
471
|
+
"Start '%s' is before the backfill start_date '%s'; "
|
|
472
|
+
"skipping out-of-range periods",
|
|
473
|
+
start,
|
|
474
|
+
self.start_date,
|
|
475
|
+
)
|
|
476
|
+
s = defn_start
|
|
477
|
+
if self.end_date is not None:
|
|
478
|
+
defn_end = self._parse_month(self.end_date)
|
|
479
|
+
if e > defn_end:
|
|
480
|
+
_logger.warning(
|
|
481
|
+
"End '%s' is after the backfill end_date '%s'; "
|
|
482
|
+
"skipping out-of-range periods",
|
|
483
|
+
end,
|
|
484
|
+
self.end_date,
|
|
485
|
+
)
|
|
486
|
+
e = defn_end
|
|
487
|
+
|
|
425
488
|
keys: list[str] = []
|
|
426
489
|
while s <= e:
|
|
427
490
|
keys.append(s.format(self._FMT))
|
|
@@ -502,6 +565,27 @@ class HourlyBackfill(BackfillDef):
|
|
|
502
565
|
if self.data_lag:
|
|
503
566
|
e = e.subtract(hours=self.data_lag)
|
|
504
567
|
|
|
568
|
+
# Clamp overrides to definition bounds
|
|
569
|
+
defn_start = self._parse_hour(self.start_date)
|
|
570
|
+
if s < defn_start:
|
|
571
|
+
_logger.warning(
|
|
572
|
+
"Start '%s' is before the backfill start_date '%s'; "
|
|
573
|
+
"skipping out-of-range periods",
|
|
574
|
+
start,
|
|
575
|
+
self.start_date,
|
|
576
|
+
)
|
|
577
|
+
s = defn_start
|
|
578
|
+
if self.end_date is not None:
|
|
579
|
+
defn_end = self._parse_hour(self.end_date)
|
|
580
|
+
if e > defn_end:
|
|
581
|
+
_logger.warning(
|
|
582
|
+
"End '%s' is after the backfill end_date '%s'; "
|
|
583
|
+
"skipping out-of-range periods",
|
|
584
|
+
end,
|
|
585
|
+
self.end_date,
|
|
586
|
+
)
|
|
587
|
+
e = defn_end
|
|
588
|
+
|
|
505
589
|
keys: list[str] = []
|
|
506
590
|
seen: set[str] = set()
|
|
507
591
|
cur = s
|
|
@@ -354,13 +354,13 @@ class IoManager(ABC):
|
|
|
354
354
|
before_sleep_log,
|
|
355
355
|
retry,
|
|
356
356
|
stop_after_attempt,
|
|
357
|
-
|
|
357
|
+
wait_exponential_jitter,
|
|
358
358
|
)
|
|
359
359
|
|
|
360
360
|
retryer = retry(
|
|
361
361
|
stop=stop_after_attempt(self.retry.max_attempts),
|
|
362
|
-
wait=
|
|
363
|
-
|
|
362
|
+
wait=wait_exponential_jitter(
|
|
363
|
+
initial=self.retry.delay,
|
|
364
364
|
exp_base=self.retry.backoff_factor,
|
|
365
365
|
),
|
|
366
366
|
reraise=True,
|
|
@@ -202,7 +202,12 @@ class PolarsDeltaIoManager(IoManager):
|
|
|
202
202
|
merger = obj._build_merger(uri, storage_options=self.storage_options)
|
|
203
203
|
if merger is None:
|
|
204
204
|
# Target table doesn't exist yet — write source data directly.
|
|
205
|
-
obj._initial_write(
|
|
205
|
+
obj._initial_write(
|
|
206
|
+
uri,
|
|
207
|
+
storage_options=self.storage_options,
|
|
208
|
+
partition_by=context.partition_by,
|
|
209
|
+
write_options=dict(self._write_options),
|
|
210
|
+
)
|
|
206
211
|
else:
|
|
207
212
|
merger.execute()
|
|
208
213
|
self._last_partition_values = {}
|
|
@@ -287,7 +292,12 @@ class PolarsDeltaIoManager(IoManager):
|
|
|
287
292
|
def _execute_merge() -> None:
|
|
288
293
|
merger = obj._build_merger(uri, storage_options=self.storage_options)
|
|
289
294
|
if merger is None:
|
|
290
|
-
obj._initial_write(
|
|
295
|
+
obj._initial_write(
|
|
296
|
+
uri,
|
|
297
|
+
storage_options=self.storage_options,
|
|
298
|
+
partition_by=context.partition_by,
|
|
299
|
+
write_options=dict(self._write_options),
|
|
300
|
+
)
|
|
291
301
|
else:
|
|
292
302
|
merger.execute()
|
|
293
303
|
|
|
@@ -89,7 +89,11 @@ class _SparkDeltaBase(IoManager):
|
|
|
89
89
|
)
|
|
90
90
|
builder = obj._build_spark_merger(uri)
|
|
91
91
|
if builder is None:
|
|
92
|
-
obj._initial_spark_write(
|
|
92
|
+
obj._initial_spark_write(
|
|
93
|
+
uri,
|
|
94
|
+
partition_by=context.partition_by,
|
|
95
|
+
write_options=dict(self._write_options),
|
|
96
|
+
)
|
|
93
97
|
else:
|
|
94
98
|
builder.execute()
|
|
95
99
|
self._last_partition_values = {}
|
|
@@ -147,7 +147,11 @@ class SparkUCTableIoManager(IoManager):
|
|
|
147
147
|
)
|
|
148
148
|
builder = obj._build_spark_merger(table)
|
|
149
149
|
if builder is None:
|
|
150
|
-
obj._initial_spark_write(
|
|
150
|
+
obj._initial_spark_write(
|
|
151
|
+
table,
|
|
152
|
+
partition_by=context.partition_by,
|
|
153
|
+
write_options=dict(self._write_options),
|
|
154
|
+
)
|
|
151
155
|
else:
|
|
152
156
|
builder.execute()
|
|
153
157
|
self._last_partition_values = {}
|
|
@@ -325,7 +329,11 @@ class SparkUCVolumeDeltaIoManager(IoManager):
|
|
|
325
329
|
)
|
|
326
330
|
builder = obj._build_spark_merger(uri)
|
|
327
331
|
if builder is None:
|
|
328
|
-
obj._initial_spark_write(
|
|
332
|
+
obj._initial_spark_write(
|
|
333
|
+
uri,
|
|
334
|
+
partition_by=context.partition_by,
|
|
335
|
+
write_options=dict(self._write_options),
|
|
336
|
+
)
|
|
329
337
|
else:
|
|
330
338
|
builder.execute()
|
|
331
339
|
self._last_partition_values = {}
|
|
@@ -256,7 +256,11 @@ class DeltaMerge:
|
|
|
256
256
|
return type(self.source).__module__.startswith("pyspark.")
|
|
257
257
|
|
|
258
258
|
def _initial_write(
|
|
259
|
-
self,
|
|
259
|
+
self,
|
|
260
|
+
table_uri: str,
|
|
261
|
+
storage_options: dict[str, str] | None = None,
|
|
262
|
+
partition_by: list[str] | None = None,
|
|
263
|
+
write_options: dict[str, Any] | None = None,
|
|
260
264
|
) -> None:
|
|
261
265
|
"""Write source data directly when the target table doesn't exist yet.
|
|
262
266
|
|
|
@@ -265,15 +269,20 @@ class DeltaMerge:
|
|
|
265
269
|
"""
|
|
266
270
|
import polars as pl # noqa: PLC0415
|
|
267
271
|
|
|
272
|
+
opts: dict[str, Any] = dict(write_options or {})
|
|
273
|
+
if partition_by:
|
|
274
|
+
delta_opts: dict[str, Any] = opts.setdefault("delta_write_options", {})
|
|
275
|
+
delta_opts.setdefault("partition_by", partition_by)
|
|
276
|
+
|
|
268
277
|
source = self.source
|
|
269
278
|
if isinstance(source, pl.LazyFrame):
|
|
270
|
-
source.sink_delta(table_uri, storage_options=storage_options)
|
|
279
|
+
source.sink_delta(table_uri, storage_options=storage_options, **opts)
|
|
271
280
|
elif isinstance(source, pl.DataFrame):
|
|
272
|
-
source.write_delta(table_uri, storage_options=storage_options)
|
|
281
|
+
source.write_delta(table_uri, storage_options=storage_options, **opts)
|
|
273
282
|
else:
|
|
274
283
|
# PyArrow or other — convert to polars first
|
|
275
284
|
df = pl.DataFrame(pl.from_arrow(source))
|
|
276
|
-
df.write_delta(table_uri, storage_options=storage_options)
|
|
285
|
+
df.write_delta(table_uri, storage_options=storage_options, **opts)
|
|
277
286
|
|
|
278
287
|
def _build_spark_merger(self, table_identifier: str) -> Any:
|
|
279
288
|
"""Build a fresh ``delta.tables.DeltaMergeBuilder`` for Spark.
|
|
@@ -329,7 +338,12 @@ class DeltaMerge:
|
|
|
329
338
|
|
|
330
339
|
return builder
|
|
331
340
|
|
|
332
|
-
def _initial_spark_write(
|
|
341
|
+
def _initial_spark_write(
|
|
342
|
+
self,
|
|
343
|
+
table_identifier: str,
|
|
344
|
+
partition_by: list[str] | None = None,
|
|
345
|
+
write_options: dict[str, str] | None = None,
|
|
346
|
+
) -> None:
|
|
333
347
|
"""Write source Spark DataFrame directly when the target doesn't exist.
|
|
334
348
|
|
|
335
349
|
Called by the Spark IoManager on first run.
|
|
@@ -339,6 +353,10 @@ class DeltaMerge:
|
|
|
339
353
|
)
|
|
340
354
|
|
|
341
355
|
writer = self.source.write.format("delta").mode("error")
|
|
356
|
+
if partition_by:
|
|
357
|
+
writer = writer.partitionBy(*partition_by)
|
|
358
|
+
for k, v in (write_options or {}).items():
|
|
359
|
+
writer = writer.option(k, v)
|
|
342
360
|
if is_path:
|
|
343
361
|
writer.save(table_identifier)
|
|
344
362
|
else:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|