fabricks 3.0.2__py3-none-any.whl → 3.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -301,9 +301,8 @@ class Configurator(ABC):
301
301
  """
302
302
  if self.mode == "memory":
303
303
  DEFAULT_LOGGER.debug("memory (no optimize)", extra={"job": self})
304
- else:
305
- assert self.table.exists()
306
304
 
305
+ else:
307
306
  if vacuum:
308
307
  self.vacuum()
309
308
  if optimize:
@@ -312,19 +311,23 @@ class Configurator(ABC):
312
311
  self.table.compute_statistics()
313
312
 
314
313
  def vacuum(self):
315
- job = self.options.table.get("retention_days")
316
- step = self.step_conf.get("table_options", {}).get("retention_days", None)
317
- runtime = CONF_RUNTIME.get("options", {}).get("retention_days")
318
-
319
- if job is not None:
320
- retention_days = job
321
- elif step:
322
- retention_days = step
314
+ if self.mode == "memory":
315
+ DEFAULT_LOGGER.debug("memory (no vacuum)", extra={"job": self})
316
+
323
317
  else:
324
- assert runtime
325
- retention_days = runtime
318
+ job = self.options.table.get("retention_days")
319
+ step = self.step_conf.get("table_options", {}).get("retention_days", None)
320
+ runtime = CONF_RUNTIME.get("options", {}).get("retention_days")
321
+
322
+ if job is not None:
323
+ retention_days = job
324
+ elif step:
325
+ retention_days = step
326
+ else:
327
+ assert runtime
328
+ retention_days = runtime
326
329
 
327
- self.table.vacuum(retention_days=retention_days)
330
+ self.table.vacuum(retention_days=retention_days)
328
331
 
329
332
  def __str__(self):
330
333
  return f"{self.step}.{self.topic}_{self.item}"
@@ -103,6 +103,21 @@ class Bronze(BaseJob):
103
103
  DEFAULT_LOGGER.debug("drop external table", extra={"job": self})
104
104
  self.spark.sql(f"drop table if exists {self.qualified_name}")
105
105
 
106
+ def analyze_external_table(self):
107
+ DEFAULT_LOGGER.debug("analyze external table", extra={"job": self})
108
+ self.spark.sql(f"analyze table {self.qualified_name} compute statistics")
109
+
110
+ def vacuum_external_table(self, retention_hours: Optional[int] = 168):
111
+ from delta import DeltaTable
112
+
113
+ DEFAULT_LOGGER.debug("vacuum external table", extra={"job": self})
114
+ try:
115
+ dt = DeltaTable.forPath(self.spark, self.data_path.string)
116
+ self.spark.sql("SET self.spark.databricks.delta.retentionDurationCheck.enabled = False")
117
+ dt.vacuum(retention_hours)
118
+ finally:
119
+ self.spark.sql("SET self.spark.databricks.delta.retentionDurationCheck.enabled = True")
120
+
106
121
  def optimize_external_table(
107
122
  self,
108
123
  vacuum: Optional[bool] = True,
@@ -110,20 +125,10 @@ class Bronze(BaseJob):
110
125
  ):
111
126
  DEFAULT_LOGGER.debug("optimize external table", extra={"job": self})
112
127
  if vacuum:
113
- from delta import DeltaTable
114
-
115
- dt = DeltaTable.forPath(self.spark, self.data_path.string)
116
- retention_days = 7
117
- DEFAULT_LOGGER.debug(f"{self.data_path} - vacuum table (removing files older than {retention_days} days)")
118
- try:
119
- self.spark.sql("SET self.spark.databricks.delta.retentionDurationCheck.enabled = False")
120
- dt.vacuum(retention_days * 24)
121
- finally:
122
- self.spark.sql("SET self.spark.databricks.delta.retentionDurationCheck.enabled = True")
128
+ self.vacuum_external_table()
123
129
 
124
130
  if analyze:
125
- DEFAULT_LOGGER.debug(f"{self.data_path} - compute delta statistics")
126
- self.spark.sql(f"analyze table delta.`{self.data_path}` compute delta statistics")
131
+ self.analyze_external_table()
127
132
 
128
133
  @property
129
134
  def parser(self) -> BaseParser:
@@ -370,6 +375,14 @@ class Bronze(BaseJob):
370
375
  else:
371
376
  super().optimize(vacuum=vacuum, optimize=optimize, analyze=analyze)
372
377
 
378
+ def vacuum(self):
379
+ if self.mode == "memory":
380
+ DEFAULT_LOGGER.info("memory (no vacuum)", extra={"job": self})
381
+ elif self.mode == "register":
382
+ self.vacuum_external_table()
383
+ else:
384
+ super().vacuum()
385
+
373
386
  def overwrite(self):
374
387
  self.truncate()
375
388
  self.run()
@@ -433,7 +433,11 @@ class Table(DbObject):
433
433
  cols = [
434
434
  f"`{name}`"
435
435
  for name, dtype in self.dataframe.dtypes
436
- if not dtype.startswith("struct") and not dtype.startswith("array") and name not in ["__metadata"]
436
+ if not dtype.startswith("struct")
437
+ and not dtype.startswith("array")
438
+ and not dtype.startswith("variant")
439
+ and not dtype.startswith("map")
440
+ and name not in ["__metadata"]
437
441
  ]
438
442
  cols = ", ".join(sorted(cols))
439
443
  self.spark.sql(f"analyze table {self.qualified_name} compute statistics for columns {cols}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fabricks
3
- Version: 3.0.2
3
+ Version: 3.0.3
4
4
  Author-email: BMS DWH Team <bi_support@bmsuisse.ch>
5
5
  Requires-Python: <4,>=3.9
6
6
  Requires-Dist: azure-data-tables<13,>=12.5.0
@@ -105,7 +105,7 @@ fabricks/core/deploy/tables.py,sha256=HPQxdLfggSzL3PTqHnBaJnScXuROK_oko-vqjiXWrn
105
105
  fabricks/core/deploy/udfs.py,sha256=TBTwMJAuZpsShu_z-Z-LENo3P_eWX69qUkmFuRqv3kU,516
106
106
  fabricks/core/deploy/views.py,sha256=DAPCF1gijNGVioKUhRH_PS0koAYhoeDgMvBy-UJ6GQc,13756
107
107
  fabricks/core/jobs/__init__.py,sha256=W_1m6LoGiXBml_8cucedju_hllSSnZGKsZjyFh-2oJw,531
108
- fabricks/core/jobs/bronze.py,sha256=rufzX9sFiyu9bIkEC4NRKLOn3G1egj80I-liiFjpUbA,13183
108
+ fabricks/core/jobs/bronze.py,sha256=89nsucGyTGAYPd6pO-MZUmi031P62gp095PTYjSftjg,13514
109
109
  fabricks/core/jobs/get_job.py,sha256=35zay3Z_WoJIylzEQlGle6UvrE1EClfRbFEVGvszof0,3675
110
110
  fabricks/core/jobs/get_job_conf.py,sha256=3vAj_usCbNqejMUKOF85LPaHBYAwxgrDG7LYgY-vBUw,4812
111
111
  fabricks/core/jobs/get_job_id.py,sha256=6dLyzxGHlRvJZVJSwZkCk3iXzWkIhePC_6FhoP0gEN4,607
@@ -115,7 +115,7 @@ fabricks/core/jobs/silver.py,sha256=wn6c6hoeppjlWf6EutB_8qE5Sxu2PIWk5iQecRUuJ5o,
115
115
  fabricks/core/jobs/base/__init__.py,sha256=_AdWtyL7yZG2TOZ9e8WyNPrOjmm6EDkI_TNym5cLDws,208
116
116
  fabricks/core/jobs/base/_types.py,sha256=r_s1mG-kHErvC9tMX5ndNEgA_3ASD2ouEkYylbhe-18,6764
117
117
  fabricks/core/jobs/base/checker.py,sha256=LPK5f3ucT7T4Z7LjlOyHPXFfb94J_DdYVp6X85wIvDk,5324
118
- fabricks/core/jobs/base/configurator.py,sha256=671-ksuf5FDIUtCLmt_HhLYhUl34jUdKb7S1wiotBzk,11439
118
+ fabricks/core/jobs/base/configurator.py,sha256=ARj920yJJdNtD0Iz8IjbhSyJqlVrXzSz29n0mXofo-k,11569
119
119
  fabricks/core/jobs/base/exception.py,sha256=HrdxEuOfK5rY-ItZvEL3iywLgdpYUpmWFkjjjks7oYc,2318
120
120
  fabricks/core/jobs/base/generator.py,sha256=LdI3PDrwee5rjwlFlduA4_s-7rE1AsnFrYdgQJL5_tE,15527
121
121
  fabricks/core/jobs/base/invoker.py,sha256=xJV9fLtY36qfnclqKqNBsjryyR8x39wfhbYJtzOPRyM,6342
@@ -145,7 +145,7 @@ fabricks/metastore/_types.py,sha256=NXYxwQHP0sCllM0N6QBbaK4CdtM_m_rHFDxRNRfBcLU,
145
145
  fabricks/metastore/database.py,sha256=1EjbRh2b6xEdHJyc4C4xee6FXDiKuPgm-8Q3Gqt7eds,1942
146
146
  fabricks/metastore/dbobject.py,sha256=EdxofFMCx6XdqFkm9Z5x4ywW4sstvdpc1d_EhYsE0KY,1883
147
147
  fabricks/metastore/pyproject.toml,sha256=6RZM9RMKMDF_EAequhORZ7TD0BQNk7aBCTWAv-sRcp0,519
148
- fabricks/metastore/table.py,sha256=3rQZDOJZ2GchpVQ4N9Cu-SgEZI2TlUGqO9cWchDsitE,24196
148
+ fabricks/metastore/table.py,sha256=YywtGc6z_zMwvK9wORvvys50AB-gBd9ZGAdtQq614yc,24312
149
149
  fabricks/metastore/utils.py,sha256=8SxhjDkz_aSH4IGUusel7hqOQxP9U8PNBCY0M7GH00Y,1355
150
150
  fabricks/metastore/view.py,sha256=Va7xdFtOW9GcDSlyoZNgcF07qty9abtex41au6OSz6c,1381
151
151
  fabricks/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -171,6 +171,6 @@ fabricks/utils/schema/get_schema_for_type.py,sha256=u9FFYvWyq9VQdNJNu79-SCN9iGUB
171
171
  fabricks/utils/write/__init__.py,sha256=i0UnZenXj9Aq0b0_aU3s6882vg-Vu_AyKfQhl_dTp-g,200
172
172
  fabricks/utils/write/delta.py,sha256=mpaSxBNcl6N0QheGLx8rjeyWUvy1Yvvj4raGRv7GL5M,1229
173
173
  fabricks/utils/write/stream.py,sha256=wQBpAnQtYA6nl79sPKhVM6u5m-66suX7B6VQ6tW4TOs,622
174
- fabricks-3.0.2.dist-info/METADATA,sha256=paAVxNEsF-lMQTD7iOyTAnclYtDDVP-M3xTzCzcwOjI,682
175
- fabricks-3.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
176
- fabricks-3.0.2.dist-info/RECORD,,
174
+ fabricks-3.0.3.dist-info/METADATA,sha256=dztXMj5CrebPZZmMtr-LdpcS2Zg6sO0x0OCUzwLik3s,682
175
+ fabricks-3.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
176
+ fabricks-3.0.3.dist-info/RECORD,,