fabricks 3.0.5.2__py3-none-any.whl → 3.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. fabricks/api/__init__.py +2 -0
  2. fabricks/api/context.py +1 -2
  3. fabricks/api/deploy.py +3 -0
  4. fabricks/api/job_schema.py +2 -2
  5. fabricks/api/masks.py +3 -0
  6. fabricks/api/notebooks/initialize.py +2 -2
  7. fabricks/api/notebooks/process.py +2 -2
  8. fabricks/api/notebooks/run.py +2 -2
  9. fabricks/api/notebooks/schedule.py +75 -0
  10. fabricks/api/notebooks/terminate.py +2 -2
  11. fabricks/api/schedules.py +2 -16
  12. fabricks/cdc/__init__.py +2 -2
  13. fabricks/cdc/base/__init__.py +2 -2
  14. fabricks/cdc/base/_types.py +9 -2
  15. fabricks/cdc/base/configurator.py +86 -41
  16. fabricks/cdc/base/generator.py +44 -35
  17. fabricks/cdc/base/merger.py +16 -14
  18. fabricks/cdc/base/processor.py +232 -144
  19. fabricks/cdc/nocdc.py +8 -7
  20. fabricks/cdc/templates/{query → ctes}/base.sql.jinja +7 -6
  21. fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
  22. fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
  23. fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
  24. fabricks/cdc/templates/{query → ctes}/rectify.sql.jinja +4 -22
  25. fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
  26. fabricks/cdc/templates/filter.sql.jinja +4 -4
  27. fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
  28. fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
  29. fabricks/cdc/templates/merge.sql.jinja +3 -2
  30. fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
  31. fabricks/cdc/templates/queries/context.sql.jinja +186 -0
  32. fabricks/cdc/templates/{query/nocdc.sql.jinja → queries/nocdc/complete.sql.jinja} +1 -1
  33. fabricks/cdc/templates/queries/nocdc/update.sql.jinja +35 -0
  34. fabricks/cdc/templates/{query → queries}/scd1.sql.jinja +2 -28
  35. fabricks/cdc/templates/{query → queries}/scd2.sql.jinja +29 -48
  36. fabricks/cdc/templates/query.sql.jinja +15 -11
  37. fabricks/context/__init__.py +18 -4
  38. fabricks/context/_types.py +2 -0
  39. fabricks/context/config/__init__.py +92 -0
  40. fabricks/context/config/utils.py +53 -0
  41. fabricks/context/log.py +8 -2
  42. fabricks/context/runtime.py +87 -263
  43. fabricks/context/secret.py +1 -1
  44. fabricks/context/spark_session.py +1 -1
  45. fabricks/context/utils.py +76 -0
  46. fabricks/core/dags/generator.py +6 -7
  47. fabricks/core/dags/log.py +2 -15
  48. fabricks/core/dags/processor.py +11 -11
  49. fabricks/core/dags/utils.py +15 -1
  50. fabricks/core/{scripts/job_schema.py → job_schema.py} +4 -0
  51. fabricks/core/jobs/base/_types.py +64 -22
  52. fabricks/core/jobs/base/checker.py +13 -12
  53. fabricks/core/jobs/base/configurator.py +41 -67
  54. fabricks/core/jobs/base/generator.py +55 -24
  55. fabricks/core/jobs/base/invoker.py +54 -30
  56. fabricks/core/jobs/base/processor.py +43 -26
  57. fabricks/core/jobs/bronze.py +45 -38
  58. fabricks/core/jobs/get_jobs.py +2 -2
  59. fabricks/core/jobs/get_schedule.py +10 -0
  60. fabricks/core/jobs/get_schedules.py +32 -0
  61. fabricks/core/jobs/gold.py +61 -48
  62. fabricks/core/jobs/silver.py +39 -40
  63. fabricks/core/masks.py +52 -0
  64. fabricks/core/parsers/base.py +2 -2
  65. fabricks/core/schedules/__init__.py +14 -0
  66. fabricks/core/schedules/diagrams.py +46 -0
  67. fabricks/core/schedules/get_schedule.py +5 -0
  68. fabricks/core/schedules/get_schedules.py +9 -0
  69. fabricks/core/schedules/run.py +3 -0
  70. fabricks/core/schedules/views.py +61 -0
  71. fabricks/core/steps/base.py +110 -72
  72. fabricks/core/udfs.py +12 -23
  73. fabricks/core/views.py +20 -13
  74. fabricks/deploy/__init__.py +97 -0
  75. fabricks/deploy/masks.py +8 -0
  76. fabricks/deploy/notebooks.py +71 -0
  77. fabricks/deploy/schedules.py +8 -0
  78. fabricks/{core/deploy → deploy}/tables.py +16 -13
  79. fabricks/{core/deploy → deploy}/udfs.py +3 -1
  80. fabricks/deploy/utils.py +36 -0
  81. fabricks/{core/deploy → deploy}/views.py +5 -9
  82. fabricks/metastore/database.py +3 -3
  83. fabricks/metastore/dbobject.py +4 -4
  84. fabricks/metastore/table.py +157 -88
  85. fabricks/metastore/view.py +13 -6
  86. fabricks/utils/_types.py +6 -0
  87. fabricks/utils/azure_table.py +4 -3
  88. fabricks/utils/helpers.py +141 -11
  89. fabricks/utils/log.py +29 -18
  90. fabricks/utils/read/_types.py +1 -1
  91. fabricks/utils/schema/get_schema_for_type.py +6 -0
  92. fabricks/utils/write/delta.py +3 -3
  93. {fabricks-3.0.5.2.dist-info → fabricks-3.0.6.dist-info}/METADATA +2 -1
  94. fabricks-3.0.6.dist-info/RECORD +175 -0
  95. fabricks/api/notebooks/add_fabricks.py +0 -13
  96. fabricks/api/notebooks/optimize.py +0 -29
  97. fabricks/api/notebooks/vacuum.py +0 -29
  98. fabricks/cdc/templates/query/context.sql.jinja +0 -101
  99. fabricks/cdc/templates/query/current.sql.jinja +0 -32
  100. fabricks/cdc/templates/query/deduplicate_hash.sql.jinja +0 -21
  101. fabricks/cdc/templates/query/deduplicate_key.sql.jinja +0 -14
  102. fabricks/cdc/templates/query/hash.sql.jinja +0 -1
  103. fabricks/cdc/templates/query/slice.sql.jinja +0 -14
  104. fabricks/config/__init__.py +0 -0
  105. fabricks/config/base.py +0 -8
  106. fabricks/config/fabricks/__init__.py +0 -26
  107. fabricks/config/fabricks/base.py +0 -90
  108. fabricks/config/fabricks/environment.py +0 -9
  109. fabricks/config/fabricks/pyproject.py +0 -47
  110. fabricks/config/jobs/__init__.py +0 -6
  111. fabricks/config/jobs/base.py +0 -101
  112. fabricks/config/jobs/bronze.py +0 -38
  113. fabricks/config/jobs/gold.py +0 -27
  114. fabricks/config/jobs/silver.py +0 -22
  115. fabricks/config/runtime.py +0 -67
  116. fabricks/config/steps/__init__.py +0 -6
  117. fabricks/config/steps/base.py +0 -50
  118. fabricks/config/steps/bronze.py +0 -7
  119. fabricks/config/steps/gold.py +0 -14
  120. fabricks/config/steps/silver.py +0 -15
  121. fabricks/core/deploy/__init__.py +0 -17
  122. fabricks/core/schedules.py +0 -142
  123. fabricks/core/scripts/__init__.py +0 -9
  124. fabricks/core/scripts/armageddon.py +0 -87
  125. fabricks/core/scripts/stats.py +0 -51
  126. fabricks/core/scripts/steps.py +0 -26
  127. fabricks-3.0.5.2.dist-info/RECORD +0 -177
  128. /fabricks/cdc/templates/{filter → filters}/final.sql.jinja +0 -0
  129. /fabricks/cdc/templates/{filter → filters}/latest.sql.jinja +0 -0
  130. /fabricks/cdc/templates/{filter → filters}/update.sql.jinja +0 -0
  131. /fabricks/cdc/templates/{merge → merges}/scd1.sql.jinja +0 -0
  132. /fabricks/cdc/templates/{merge → merges}/scd2.sql.jinja +0 -0
  133. /fabricks/cdc/templates/{query → queries}/__init__.py +0 -0
  134. /fabricks/cdc/templates/{query → queries}/final.sql.jinja +0 -0
  135. /fabricks/core/{utils.py → parsers/utils.py} +0 -0
  136. /fabricks/core/{scripts → schedules}/generate.py +0 -0
  137. /fabricks/core/{scripts → schedules}/process.py +0 -0
  138. /fabricks/core/{scripts → schedules}/terminate.py +0 -0
  139. {fabricks-3.0.5.2.dist-info → fabricks-3.0.6.dist-info}/WHEEL +0 -0
@@ -1,5 +1,5 @@
1
1
  import re
2
- from typing import List, Optional, Sequence, Union, overload
2
+ from typing import Any, List, Optional, Sequence, Union, overload
3
3
 
4
4
  from delta import DeltaTable
5
5
  from pyspark.errors.exceptions.base import AnalysisException
@@ -38,25 +38,25 @@ class Table(DbObject):
38
38
 
39
39
  @property
40
40
  def dataframe(self) -> DataFrame:
41
- assert self.is_registered, f"{self} not registered"
41
+ assert self.registered, f"{self} not registered"
42
42
 
43
43
  return self.spark.sql(f"select * from {self}")
44
44
 
45
45
  @property
46
46
  def columns(self) -> List[str]:
47
- assert self.is_registered, f"{self} not registered"
47
+ assert self.registered, f"{self} not registered"
48
48
 
49
49
  return self.dataframe.columns
50
50
 
51
51
  @property
52
52
  def rows(self) -> int:
53
- assert self.is_registered, f"{self} not registered"
53
+ assert self.registered, f"{self} not registered"
54
54
 
55
55
  return self.spark.sql(f"select count(*) from {self}").collect()[0][0]
56
56
 
57
57
  @property
58
58
  def last_version(self) -> int:
59
- assert self.is_registered, f"{self} not registered"
59
+ assert self.registered, f"{self} not registered"
60
60
 
61
61
  df = self.describe_history()
62
62
  version = df.select(max("version")).collect()[0][0]
@@ -64,33 +64,33 @@ class Table(DbObject):
64
64
 
65
65
  @property
66
66
  def identity_enabled(self) -> bool:
67
- assert self.is_registered, f"{self} not registered"
67
+ assert self.registered, f"{self} not registered"
68
68
  return self.get_property("delta.feature.identityColumns") == "supported"
69
69
 
70
70
  @property
71
71
  def type_widening_enabled(self) -> bool:
72
- assert self.is_registered, f"{self} not registered"
72
+ assert self.registered, f"{self} not registered"
73
73
  return self.get_property("delta.enableTypeWidening") == "true"
74
74
 
75
75
  @property
76
76
  def liquid_clustering_enabled(self) -> bool:
77
- assert self.is_registered, f"{self} not registered"
77
+ assert self.registered, f"{self} not registered"
78
78
  return self.get_property("delta.feature.clustering") == "supported"
79
79
 
80
80
  @property
81
81
  def auto_liquid_clustering_enabled(self) -> bool:
82
- assert self.is_registered, f"{self} not registered"
82
+ assert self.registered, f"{self} not registered"
83
83
  return self.get_property("delta.clusterByAuto") == "true"
84
84
 
85
85
  @property
86
86
  def vorder_enabled(self) -> bool:
87
- assert self.is_registered, f"{self} not registered"
87
+ assert self.registered, f"{self} not registered"
88
88
  return self.get_property("delta.parquet.vorder.enabled") == "true"
89
89
 
90
90
  def drop(self):
91
91
  super().drop()
92
92
  if self.delta_path.exists():
93
- DEFAULT_LOGGER.debug("delete delta folder", extra={"job": self})
93
+ DEFAULT_LOGGER.debug("delete delta folder", extra={"label": self})
94
94
  self.delta_path.rm()
95
95
 
96
96
  @overload
@@ -104,6 +104,10 @@ class Table(DbObject):
104
104
  liquid_clustering: Optional[bool] = False,
105
105
  cluster_by: Optional[Union[List[str], str]] = None,
106
106
  properties: Optional[dict[str, str]] = None,
107
+ masks: Optional[dict[str, str]] = None,
108
+ primary_key: Optional[dict[str, Any]] = None,
109
+ foreign_keys: Optional[dict[str, Any]] = None,
110
+ comments: Optional[dict[str, str]] = None,
107
111
  ): ...
108
112
 
109
113
  @overload
@@ -117,6 +121,10 @@ class Table(DbObject):
117
121
  liquid_clustering: Optional[bool] = False,
118
122
  cluster_by: Optional[Union[List[str], str]] = None,
119
123
  properties: Optional[dict[str, str]] = None,
124
+ masks: Optional[dict[str, str]] = None,
125
+ primary_key: Optional[dict[str, Any]] = None,
126
+ foreign_keys: Optional[dict[str, Any]] = None,
127
+ comments: Optional[dict[str, str]] = None,
120
128
  ): ...
121
129
 
122
130
  def create(
@@ -129,6 +137,10 @@ class Table(DbObject):
129
137
  liquid_clustering: Optional[bool] = False,
130
138
  cluster_by: Optional[Union[List[str], str]] = None,
131
139
  properties: Optional[dict[str, str]] = None,
140
+ masks: Optional[dict[str, str]] = None,
141
+ primary_key: Optional[dict[str, Any]] = None,
142
+ foreign_keys: Optional[dict[str, Any]] = None,
143
+ comments: Optional[dict[str, str]] = None,
132
144
  ):
133
145
  self._create(
134
146
  df=df,
@@ -139,8 +151,41 @@ class Table(DbObject):
139
151
  liquid_clustering=liquid_clustering,
140
152
  cluster_by=cluster_by,
141
153
  properties=properties,
154
+ masks=masks,
155
+ primary_key=primary_key,
156
+ foreign_keys=foreign_keys,
157
+ comments=comments,
142
158
  )
143
159
 
160
+ def _get_ddl_columns(
161
+ self, df: DataFrame, masks: Optional[dict[str, str]], comments: Optional[dict[str, str]]
162
+ ) -> List[str]:
163
+ def _backtick(name: str, dtype: str) -> str:
164
+ j = df.schema[name].jsonValue()
165
+ r = re.compile(r"(?<='name': ')[^']+(?=',)")
166
+
167
+ names = re.findall(r, str(j))
168
+ for n in names:
169
+ escaped = re.escape(n)
170
+ dtype = re.sub(f"(?<=,){escaped}(?=:)|(?<=<){escaped}(?=:)", f"`{n}`", dtype)
171
+
172
+ return dtype
173
+
174
+ out = []
175
+
176
+ for name, dtype in df.dtypes:
177
+ col = [f"`{name}`", _backtick(name, dtype)]
178
+
179
+ if comments and name in comments:
180
+ col.append(f"comment '{comments[name]}'")
181
+
182
+ if masks and name in masks:
183
+ col.append(f"mask {masks[name]}")
184
+
185
+ out.append(" ".join(col))
186
+
187
+ return out
188
+
144
189
  def _create(
145
190
  self,
146
191
  df: Optional[DataFrame] = None,
@@ -151,26 +196,23 @@ class Table(DbObject):
151
196
  liquid_clustering: Optional[bool] = False,
152
197
  cluster_by: Optional[Union[List[str], str]] = None,
153
198
  properties: Optional[dict[str, str]] = None,
199
+ masks: Optional[dict[str, str]] = None,
200
+ primary_key: Optional[dict[str, Any]] = None,
201
+ foreign_keys: Optional[dict[str, Any]] = None,
202
+ comments: Optional[dict[str, str]] = None,
154
203
  ):
155
- DEFAULT_LOGGER.info("create table", extra={"job": self})
204
+ DEFAULT_LOGGER.info("create table", extra={"label": self})
156
205
  if not df:
157
206
  assert schema is not None
158
207
  df = self.spark.createDataFrame([], schema)
159
208
 
160
- def _backtick(name: str, dtype: str) -> str:
161
- j = df.schema[name].jsonValue()
162
- r = re.compile(r"(?<='name': ')[^']+(?=',)")
163
- names = re.findall(r, str(j))
164
- for n in names:
165
- escaped = re.escape(n)
166
- dtype = re.sub(f"(?<=,){escaped}(?=:)|(?<=<){escaped}(?=:)", f"`{n}`", dtype)
167
- return dtype
168
-
169
- ddl_columns = ",\n\t".join([f"`{name}` {_backtick(name, dtype)}" for name, dtype in df.dtypes])
209
+ ddl_columns = ",\n\t".join(self._get_ddl_columns(df, masks=masks, comments=comments))
170
210
  ddl_identity = "-- no identity" if "__identity" not in df.columns else ""
171
211
  ddl_cluster_by = "-- no cluster by"
172
212
  ddl_partition_by = "-- no partitioned by"
173
213
  ddl_tblproperties = "-- not tblproperties"
214
+ ddl_primary_key = "-- no primary key"
215
+ ddl_foreign_keys = "-- no foreign keys"
174
216
 
175
217
  if liquid_clustering:
176
218
  if cluster_by:
@@ -192,6 +234,29 @@ class Table(DbObject):
192
234
  if identity:
193
235
  ddl_identity = "__identity bigint generated by default as identity (start with 1 increment by 1), "
194
236
 
237
+ if primary_key:
238
+ assert len(primary_key) == 1, "only one primary key allowed"
239
+
240
+ for key, value in primary_key.items():
241
+ keys = value["keys"]
242
+ if isinstance(keys, str):
243
+ keys = [keys]
244
+ ddl_primary_key = f", constraint {key} primary key (" + ", ".join(keys) + ")"
245
+
246
+ if foreign_keys:
247
+ fks = []
248
+
249
+ for key, value in foreign_keys.items():
250
+ reference = value["reference"]
251
+ keys = value["keys"]
252
+ if isinstance(keys, str):
253
+ keys = [keys]
254
+ keys = ", ".join([f"`{k}`" for k in keys])
255
+ fk = f"constraint {key} foreign key ({keys}) references {reference}"
256
+ fks.append(fk)
257
+
258
+ ddl_foreign_keys = "," + ", ".join(fks)
259
+
195
260
  if not properties:
196
261
  special_char = False
197
262
 
@@ -218,6 +283,8 @@ class Table(DbObject):
218
283
  (
219
284
  {ddl_identity}
220
285
  {ddl_columns}
286
+ {ddl_foreign_keys}
287
+ {ddl_primary_key}
221
288
  )
222
289
  {ddl_tblproperties}
223
290
  {ddl_partition_by}
@@ -229,7 +296,7 @@ class Table(DbObject):
229
296
  except Exception:
230
297
  pass
231
298
 
232
- DEFAULT_LOGGER.debug("ddl", extra={"job": self, "sql": sql})
299
+ DEFAULT_LOGGER.debug("ddl", extra={"label": self, "sql": sql})
233
300
  self.spark.sql(sql)
234
301
 
235
302
  @property
@@ -238,38 +305,40 @@ class Table(DbObject):
238
305
 
239
306
  @property
240
307
  def column_mapping_enabled(self) -> bool:
241
- assert self.is_registered, f"{self} not registered"
308
+ assert self.registered, f"{self} not registered"
242
309
 
243
310
  return self.get_property("delta.columnMapping.mode") == "name"
244
311
 
245
312
  def exists(self) -> bool:
246
- return self.is_deltatable and self.is_registered
313
+ return self.is_deltatable and self.registered
247
314
 
248
315
  def register(self):
249
- DEFAULT_LOGGER.debug("register table", extra={"job": self})
316
+ DEFAULT_LOGGER.debug("register table", extra={"label": self})
250
317
  self.spark.sql(f"create table if not exists {self.qualified_name} using delta location '{self.delta_path}'")
251
318
 
252
319
  def restore_to_version(self, version: int):
253
- assert self.is_registered, f"{self} not registered"
320
+ assert self.registered, f"{self} not registered"
254
321
 
255
- DEFAULT_LOGGER.info(f"restore table to version {version}", extra={"job": self})
322
+ DEFAULT_LOGGER.info(f"restore table to version {version}", extra={"label": self})
256
323
  self.spark.sql(f"restore table {self.qualified_name} to version as of {version}")
257
324
 
258
325
  def truncate(self):
259
- assert self.is_registered, f"{self} not registered"
326
+ assert self.registered, f"{self} not registered"
260
327
 
261
- DEFAULT_LOGGER.warning("truncate table", extra={"job": self})
328
+ DEFAULT_LOGGER.warning("truncate table", extra={"label": self})
262
329
  self.create_restore_point()
263
330
  self.spark.sql(f"truncate table {self.qualified_name}")
264
331
 
265
332
  def schema_drifted(self, df: DataFrame, exclude_columns_with_prefix: Optional[str] = None) -> bool:
266
- assert self.is_registered, f"{self} not registered"
333
+ assert self.registered, f"{self} not registered"
267
334
 
268
335
  diffs = self.get_schema_differences(df)
269
336
  return len(diffs) > 0
270
337
 
271
338
  def get_schema_differences(self, df: DataFrame) -> Sequence[SchemaDiff]:
272
- assert self.is_registered, f"{self} not registered"
339
+ assert self.registered, f"{self} not registered"
340
+
341
+ DEFAULT_LOGGER.debug("get schema differences", extra={"label": self, "df": df})
273
342
 
274
343
  df1 = self.dataframe
275
344
  if self.identity_enabled:
@@ -305,12 +374,12 @@ class Table(DbObject):
305
374
  )
306
375
 
307
376
  if diffs:
308
- DEFAULT_LOGGER.debug("difference(s) with delta table", extra={"job": self, "df": df})
377
+ DEFAULT_LOGGER.warning("difference(s) with delta table", extra={"label": self, "df": df})
309
378
 
310
379
  return diffs
311
380
 
312
381
  def update_schema(self, df: DataFrame, widen_types: bool = False):
313
- assert self.is_registered, f"{self} not registered"
382
+ assert self.registered, f"{self} not registered"
314
383
  if not self.column_mapping_enabled:
315
384
  self.enable_column_mapping()
316
385
 
@@ -323,7 +392,7 @@ class Table(DbObject):
323
392
  msg = "update schema"
324
393
 
325
394
  if diffs:
326
- DEFAULT_LOGGER.info(msg, extra={"job": self, "df": diffs})
395
+ DEFAULT_LOGGER.info(msg, extra={"label": self, "df": diffs})
327
396
 
328
397
  for row in diffs:
329
398
  if row.status == "changed":
@@ -333,7 +402,7 @@ class Table(DbObject):
333
402
 
334
403
  DEFAULT_LOGGER.debug(
335
404
  f"{row.status.replace('ed', 'ing')} {row.column} ({data_type})",
336
- extra={"job": self},
405
+ extra={"label": self},
337
406
  )
338
407
 
339
408
  try:
@@ -360,7 +429,7 @@ class Table(DbObject):
360
429
  pass
361
430
 
362
431
  def overwrite_schema(self, df: DataFrame):
363
- assert self.is_registered, f"{self} not registered"
432
+ assert self.registered, f"{self} not registered"
364
433
  if not self.column_mapping_enabled:
365
434
  self.enable_column_mapping()
366
435
 
@@ -371,7 +440,7 @@ class Table(DbObject):
371
440
 
372
441
  diffs = self.get_schema_differences(df)
373
442
  if diffs:
374
- DEFAULT_LOGGER.warning("overwrite schema", extra={"job": self, "df": diffs})
443
+ DEFAULT_LOGGER.warning("overwrite schema", extra={"label": self, "df": diffs})
375
444
 
376
445
  for row in diffs:
377
446
  if row.status == "added":
@@ -391,9 +460,9 @@ class Table(DbObject):
391
460
  self.add_column(row.column, row.new_data_type)
392
461
 
393
462
  def vacuum(self, retention_days: int = 7):
394
- assert self.is_registered, f"{self} not registered"
463
+ assert self.registered, f"{self} not registered"
395
464
 
396
- DEFAULT_LOGGER.debug(f"vacuum table (removing files older than {retention_days} days)", extra={"job": self})
465
+ DEFAULT_LOGGER.debug(f"vacuum table (removing files older than {retention_days} days)", extra={"label": self})
397
466
  self.spark.sql("SET self.spark.databricks.delta.retentionDurationCheck.enabled = False")
398
467
  try:
399
468
  self.create_restore_point()
@@ -405,9 +474,9 @@ class Table(DbObject):
405
474
  self.spark.sql("SET self.spark.databricks.delta.retentionDurationCheck.enabled = True")
406
475
 
407
476
  def optimize(self, columns: Optional[Union[str, List[str]]] = None):
408
- assert self.is_registered, f"{self} not registered"
477
+ assert self.registered, f"{self} not registered"
409
478
 
410
- DEFAULT_LOGGER.info("optimize", extra={"job": self})
479
+ DEFAULT_LOGGER.info("optimize", extra={"label": self})
411
480
 
412
481
  if self.liquid_clustering_enabled:
413
482
  self.spark.sql(f"optimize {self.qualified_name}")
@@ -417,7 +486,7 @@ class Table(DbObject):
417
486
 
418
487
  elif columns is None:
419
488
  if self.vorder_enabled:
420
- DEFAULT_LOGGER.debug("vorder", extra={"job": self})
489
+ DEFAULT_LOGGER.debug("vorder", extra={"label": self})
421
490
  self.spark.sql(f"optimize {self.qualified_name} vorder")
422
491
  else:
423
492
  self.spark.sql(f"optimize {self.qualified_name}")
@@ -429,24 +498,24 @@ class Table(DbObject):
429
498
  cols = ", ".join(columns)
430
499
 
431
500
  if self.vorder_enabled:
432
- DEFAULT_LOGGER.debug(f"zorder by {cols} vorder", extra={"job": self})
501
+ DEFAULT_LOGGER.debug(f"zorder by {cols} vorder", extra={"label": self})
433
502
  self.spark.sql(f"optimize {self.qualified_name} zorder by ({cols}) vorder")
434
503
 
435
504
  else:
436
- DEFAULT_LOGGER.debug(f"zorder by {cols}", extra={"job": self})
505
+ DEFAULT_LOGGER.debug(f"zorder by {cols}", extra={"label": self})
437
506
  self.spark.sql(f"optimize {self.qualified_name} zorder by ({cols})")
438
507
 
439
508
  def analyze(self):
440
- assert self.is_registered, f"{self} not registered"
509
+ assert self.registered, f"{self} not registered"
441
510
 
442
- DEFAULT_LOGGER.debug("analyze", extra={"job": self})
511
+ DEFAULT_LOGGER.debug("analyze", extra={"label": self})
443
512
  self.compute_statistics()
444
513
  self.compute_delta_statistics()
445
514
 
446
515
  def compute_statistics(self):
447
- assert self.is_registered, f"{self} not registered"
516
+ assert self.registered, f"{self} not registered"
448
517
 
449
- DEFAULT_LOGGER.debug("compute statistics", extra={"job": self})
518
+ DEFAULT_LOGGER.debug("compute statistics", extra={"label": self})
450
519
  cols = [
451
520
  f"`{name}`"
452
521
  for name, dtype in self.dataframe.dtypes
@@ -460,16 +529,16 @@ class Table(DbObject):
460
529
  self.spark.sql(f"analyze table {self.qualified_name} compute statistics for columns {cols}")
461
530
 
462
531
  def compute_delta_statistics(self):
463
- assert self.is_registered, f"{self} not registered"
532
+ assert self.registered, f"{self} not registered"
464
533
 
465
- DEFAULT_LOGGER.debug("compute delta statistics", extra={"job": self})
534
+ DEFAULT_LOGGER.debug("compute delta statistics", extra={"label": self})
466
535
  self.spark.sql(f"analyze table {self.qualified_name} compute delta statistics")
467
536
 
468
537
  def drop_column(self, name: str):
469
- assert self.is_registered, f"{self} not registered"
538
+ assert self.registered, f"{self} not registered"
470
539
  assert self.column_mapping_enabled, "column mapping not enabled"
471
540
 
472
- DEFAULT_LOGGER.warning(f"drop column {name}", extra={"job": self})
541
+ DEFAULT_LOGGER.warning(f"drop column {name}", extra={"label": self})
473
542
  self.spark.sql(
474
543
  f"""
475
544
  alter table {self.qualified_name}
@@ -478,10 +547,10 @@ class Table(DbObject):
478
547
  )
479
548
 
480
549
  def change_column(self, name: str, type: str):
481
- assert self.is_registered, f"{self} not registered"
550
+ assert self.registered, f"{self} not registered"
482
551
  assert self.column_mapping_enabled, "column mapping not enabled"
483
552
 
484
- DEFAULT_LOGGER.info(f"change column {name} ({type})", extra={"job": self})
553
+ DEFAULT_LOGGER.info(f"change column {name} ({type})", extra={"label": self})
485
554
  self.spark.sql(
486
555
  f"""
487
556
  alter table {self.qualified_name}
@@ -490,10 +559,10 @@ class Table(DbObject):
490
559
  )
491
560
 
492
561
  def rename_column(self, old: str, new: str):
493
- assert self.is_registered, f"{self} not registered"
562
+ assert self.registered, f"{self} not registered"
494
563
  assert self.column_mapping_enabled, "column mapping not enabled"
495
564
 
496
- DEFAULT_LOGGER.info(f"rename column {old} -> {new}", extra={"job": self})
565
+ DEFAULT_LOGGER.info(f"rename column {old} -> {new}", extra={"label": self})
497
566
  self.spark.sql(
498
567
  f"""
499
568
  alter table {self.qualified_name}
@@ -506,35 +575,35 @@ class Table(DbObject):
506
575
  return data_type
507
576
 
508
577
  def get_details(self) -> DataFrame:
509
- assert self.is_registered, f"{self} not registered"
578
+ assert self.registered, f"{self} not registered"
510
579
 
511
580
  return self.spark.sql(f"describe detail {self.qualified_name}")
512
581
 
513
582
  def get_properties(self) -> DataFrame:
514
- assert self.is_registered, f"{self} not registered"
583
+ assert self.registered, f"{self} not registered"
515
584
 
516
585
  return self.spark.sql(f"show tblproperties {self.qualified_name}")
517
586
 
518
587
  def get_description(self) -> DataFrame:
519
- assert self.is_registered, f"{self} not registered"
588
+ assert self.registered, f"{self} not registered"
520
589
 
521
590
  return self.spark.sql(f"describe extended {self.qualified_name}")
522
591
 
523
592
  def get_history(self) -> DataFrame:
524
- assert self.is_registered, f"{self} not registered"
593
+ assert self.registered, f"{self} not registered"
525
594
 
526
595
  df = self.spark.sql(f"describe history {self.qualified_name}")
527
596
  return df
528
597
 
529
598
  def get_last_version(self) -> int:
530
- assert self.is_registered, f"{self} not registered"
599
+ assert self.registered, f"{self} not registered"
531
600
 
532
601
  df = self.get_history()
533
602
  version = df.select(max("version")).collect()[0][0]
534
603
  return version
535
604
 
536
605
  def get_property(self, key: str) -> Optional[str]:
537
- assert self.is_registered, f"{self} not registered"
606
+ assert self.registered, f"{self} not registered"
538
607
 
539
608
  try:
540
609
  value = self.get_properties().where(f"key == '{key}'").select("value").collect()[0][0]
@@ -544,15 +613,15 @@ class Table(DbObject):
544
613
  return None
545
614
 
546
615
  def enable_change_data_feed(self):
547
- assert self.is_registered, f"{self} not registered"
616
+ assert self.registered, f"{self} not registered"
548
617
 
549
- DEFAULT_LOGGER.debug("enable change data feed", extra={"job": self})
618
+ DEFAULT_LOGGER.debug("enable change data feed", extra={"label": self})
550
619
  self.set_property("delta.enableChangeDataFeed", "true")
551
620
 
552
621
  def enable_column_mapping(self):
553
- assert self.is_registered, f"{self} not registered"
622
+ assert self.registered, f"{self} not registered"
554
623
 
555
- DEFAULT_LOGGER.debug("enable column mapping", extra={"job": self})
624
+ DEFAULT_LOGGER.debug("enable column mapping", extra={"label": self})
556
625
 
557
626
  try:
558
627
  self.spark.sql(
@@ -563,7 +632,7 @@ class Table(DbObject):
563
632
  )
564
633
 
565
634
  except Exception:
566
- DEFAULT_LOGGER.debug("update reader and writer version", extra={"job": self})
635
+ DEFAULT_LOGGER.debug("update reader and writer version", extra={"label": self})
567
636
  self.spark.sql(
568
637
  f"""
569
638
  alter table {self.qualified_name}
@@ -576,9 +645,9 @@ class Table(DbObject):
576
645
  )
577
646
 
578
647
  def set_property(self, key: Union[str, int], value: Union[str, int]):
579
- assert self.is_registered, f"{self} not registered"
648
+ assert self.registered, f"{self} not registered"
580
649
 
581
- DEFAULT_LOGGER.debug(f"set property {key} = {value}", extra={"job": self})
650
+ DEFAULT_LOGGER.debug(f"set property {key} = {value}", extra={"label": self})
582
651
  self.spark.sql(
583
652
  f"""
584
653
  alter table {self.qualified_name}
@@ -587,9 +656,9 @@ class Table(DbObject):
587
656
  )
588
657
 
589
658
  def add_constraint(self, name: str, expr: str):
590
- assert self.is_registered, f"{self} not registered"
659
+ assert self.registered, f"{self} not registered"
591
660
 
592
- DEFAULT_LOGGER.debug(f"add constraint ({name} check ({expr}))", extra={"job": self})
661
+ DEFAULT_LOGGER.debug(f"add constraint ({name} check ({expr}))", extra={"label": self})
593
662
  self.spark.sql(
594
663
  f"""
595
664
  alter table {self.qualified_name}
@@ -598,9 +667,9 @@ class Table(DbObject):
598
667
  )
599
668
 
600
669
  def add_comment(self, comment: str):
601
- assert self.is_registered, f"{self} not registered"
670
+ assert self.registered, f"{self} not registered"
602
671
 
603
- DEFAULT_LOGGER.debug(f"add comment '{comment}'", extra={"job": self})
672
+ DEFAULT_LOGGER.debug(f"add comment '{comment}'", extra={"label": self})
604
673
  self.spark.sql(
605
674
  f"""
606
675
  comment on table {self.qualified_name}
@@ -609,10 +678,10 @@ class Table(DbObject):
609
678
  )
610
679
 
611
680
  def add_materialized_column(self, name: str, expr: str, type: str):
612
- assert self.is_registered, f"{self} not registered"
681
+ assert self.registered, f"{self} not registered"
613
682
  assert self.column_mapping_enabled, "column mapping not enabled"
614
683
 
615
- DEFAULT_LOGGER.info(f"add materialized column ({name} {type})", extra={"job": self})
684
+ DEFAULT_LOGGER.info(f"add materialized column ({name} {type})", extra={"label": self})
616
685
  self.spark.sql(
617
686
  f""""
618
687
  alter table {self.qualified_name}
@@ -621,9 +690,9 @@ class Table(DbObject):
621
690
  )
622
691
 
623
692
  def add_column(self, name: str, type: str, after: Optional[str] = None):
624
- assert self.is_registered, f"{self} not registered"
693
+ assert self.registered, f"{self} not registered"
625
694
 
626
- DEFAULT_LOGGER.info(f"add column {name} ({type})", extra={"job": self})
695
+ DEFAULT_LOGGER.info(f"add column {name} ({type})", extra={"label": self})
627
696
  ddl_after = "" if not after else f"after {after}"
628
697
  self.spark.sql(
629
698
  f"""
@@ -633,14 +702,14 @@ class Table(DbObject):
633
702
  )
634
703
 
635
704
  def create_bloomfilter_index(self, columns: Union[str, List[str]]):
636
- assert self.is_registered, f"{self} not registered"
705
+ assert self.registered, f"{self} not registered"
637
706
 
638
707
  if isinstance(columns, str):
639
708
  columns = [columns]
640
709
  columns = [f"`{c}`" for c in columns]
641
710
  cols = ", ".join(columns)
642
711
 
643
- DEFAULT_LOGGER.info(f"bloomfilter by {cols}", extra={"job": self})
712
+ DEFAULT_LOGGER.info(f"bloomfilter by {cols}", extra={"label": self})
644
713
  self.spark.sql(
645
714
  f"""
646
715
  create bloomfilter index on table {self.qualified_name}
@@ -649,37 +718,37 @@ class Table(DbObject):
649
718
  )
650
719
 
651
720
  def create_restore_point(self):
652
- assert self.is_registered, f"{self} not registered"
721
+ assert self.registered, f"{self} not registered"
653
722
 
654
723
  last_version = self.get_last_version() + 1
655
724
  self.set_property("fabricks.last_version", last_version)
656
725
 
657
726
  def show_properties(self) -> DataFrame:
658
- assert self.is_registered, f"{self} not registered"
727
+ assert self.registered, f"{self} not registered"
659
728
 
660
729
  return self.spark.sql(f"show tblproperties {self.qualified_name}")
661
730
 
662
731
  def describe_detail(self) -> DataFrame:
663
- assert self.is_registered, f"{self} not registered"
732
+ assert self.registered, f"{self} not registered"
664
733
 
665
734
  return self.spark.sql(f"describe detail {self.qualified_name}")
666
735
 
667
736
  def describe_extended(self) -> DataFrame:
668
- assert self.is_registered, f"{self} not registered"
737
+ assert self.registered, f"{self} not registered"
669
738
 
670
739
  return self.spark.sql(f"describe extended {self.qualified_name}")
671
740
 
672
741
  def describe_history(self) -> DataFrame:
673
- assert self.is_registered, f"{self} not registered"
742
+ assert self.registered, f"{self} not registered"
674
743
 
675
744
  df = self.spark.sql(f"describe history {self.qualified_name}")
676
745
  return df
677
746
 
678
747
  def enable_liquid_clustering(self, columns: Optional[Union[str, List[str]]] = None, auto: Optional[bool] = False):
679
- assert self.is_registered, f"{self} not registered"
748
+ assert self.registered, f"{self} not registered"
680
749
 
681
750
  if auto:
682
- DEFAULT_LOGGER.info("cluster by auto", extra={"job": self})
751
+ DEFAULT_LOGGER.info("cluster by auto", extra={"label": self})
683
752
  self.spark.sql(f"alter table {self.qualified_name} cluster by automatic")
684
753
 
685
754
  else:
@@ -690,7 +759,7 @@ class Table(DbObject):
690
759
  columns = [f"`{c}`" for c in columns]
691
760
  cols = ", ".join(columns)
692
761
 
693
- DEFAULT_LOGGER.info(f"cluster by {cols}", extra={"job": self})
762
+ DEFAULT_LOGGER.info(f"cluster by {cols}", extra={"label": self})
694
763
  self.spark.sql(
695
764
  f"""
696
765
  alter table {self.qualified_name}