fabricks 3.0.19__py3-none-any.whl → 4.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. fabricks/api/context.py +15 -3
  2. fabricks/api/notebooks/schedule.py +2 -3
  3. fabricks/api/parsers.py +2 -1
  4. fabricks/api/utils.py +3 -1
  5. fabricks/cdc/__init__.py +1 -2
  6. fabricks/cdc/base/__init__.py +1 -2
  7. fabricks/cdc/base/_types.py +5 -3
  8. fabricks/cdc/base/configurator.py +5 -0
  9. fabricks/cdc/base/generator.py +7 -3
  10. fabricks/cdc/base/merger.py +2 -0
  11. fabricks/cdc/base/processor.py +15 -0
  12. fabricks/cdc/templates/README.md +490 -0
  13. fabricks/cdc/templates/ctes/base.sql.jinja +1 -0
  14. fabricks/cdc/templates/ctes/current.sql.jinja +4 -0
  15. fabricks/cdc/templates/merges/scd1.sql.jinja +6 -0
  16. fabricks/cdc/templates/merges/scd2.sql.jinja +6 -0
  17. fabricks/cdc/templates/queries/context.sql.jinja +104 -96
  18. fabricks/cdc/templates/query.sql.jinja +1 -1
  19. fabricks/context/__init__.py +13 -1
  20. fabricks/context/config.py +13 -122
  21. fabricks/context/log.py +92 -1
  22. fabricks/context/runtime.py +35 -69
  23. fabricks/context/spark_session.py +4 -4
  24. fabricks/context/utils.py +26 -39
  25. fabricks/core/__init__.py +2 -2
  26. fabricks/core/dags/base.py +5 -5
  27. fabricks/core/dags/processor.py +2 -3
  28. fabricks/core/extenders.py +1 -1
  29. fabricks/core/job_schema.py +26 -16
  30. fabricks/core/jobs/__init__.py +1 -7
  31. fabricks/core/jobs/base/README.md +1545 -0
  32. fabricks/core/jobs/base/__init__.py +1 -8
  33. fabricks/core/jobs/base/checker.py +7 -7
  34. fabricks/core/jobs/base/configurator.py +142 -63
  35. fabricks/core/jobs/base/generator.py +38 -34
  36. fabricks/core/jobs/base/invoker.py +48 -63
  37. fabricks/core/jobs/base/processor.py +13 -28
  38. fabricks/core/jobs/bronze.py +88 -38
  39. fabricks/core/jobs/get_job.py +3 -6
  40. fabricks/core/jobs/get_job_conf.py +19 -68
  41. fabricks/core/jobs/get_jobs.py +10 -11
  42. fabricks/core/jobs/get_schedules.py +3 -17
  43. fabricks/core/jobs/gold.py +89 -47
  44. fabricks/core/jobs/silver.py +42 -22
  45. fabricks/core/masks.py +11 -8
  46. fabricks/core/parsers/__init__.py +0 -2
  47. fabricks/core/parsers/base.py +10 -10
  48. fabricks/core/parsers/decorator.py +1 -1
  49. fabricks/core/parsers/get_parser.py +4 -5
  50. fabricks/core/schedules/process.py +1 -4
  51. fabricks/core/steps/base.py +27 -17
  52. fabricks/core/steps/get_step.py +2 -4
  53. fabricks/core/steps/get_step_conf.py +3 -7
  54. fabricks/core/udfs.py +7 -7
  55. fabricks/core/views.py +2 -2
  56. fabricks/deploy/__init__.py +27 -16
  57. fabricks/deploy/masks.py +1 -1
  58. fabricks/deploy/notebooks.py +19 -16
  59. fabricks/deploy/schedules.py +1 -1
  60. fabricks/deploy/tables.py +66 -49
  61. fabricks/deploy/udfs.py +2 -2
  62. fabricks/deploy/views.py +15 -16
  63. fabricks/metastore/database.py +3 -3
  64. fabricks/metastore/table.py +103 -68
  65. fabricks/models/__init__.py +125 -0
  66. fabricks/models/common.py +79 -0
  67. fabricks/models/config.py +225 -0
  68. fabricks/models/dependency.py +50 -0
  69. fabricks/models/job.py +157 -0
  70. fabricks/models/path.py +17 -0
  71. fabricks/models/runtime.py +182 -0
  72. fabricks/models/schedule.py +21 -0
  73. fabricks/models/step.py +103 -0
  74. fabricks/models/table.py +77 -0
  75. fabricks/{core/jobs/get_job_id.py → models/utils.py} +2 -0
  76. fabricks/utils/helpers.py +6 -5
  77. fabricks/utils/log.py +25 -6
  78. fabricks/utils/path.py +265 -108
  79. fabricks/utils/pip.py +7 -7
  80. fabricks/utils/read/read.py +23 -22
  81. fabricks/utils/read/read_yaml.py +2 -2
  82. fabricks/utils/write/delta.py +4 -4
  83. fabricks/utils/write/stream.py +2 -2
  84. {fabricks-3.0.19.dist-info → fabricks-4.0.1.dist-info}/METADATA +9 -4
  85. {fabricks-3.0.19.dist-info → fabricks-4.0.1.dist-info}/RECORD +86 -83
  86. fabricks/context/_types.py +0 -139
  87. fabricks/context/helpers.py +0 -63
  88. fabricks/core/jobs/base/_types.py +0 -284
  89. fabricks/core/parsers/_types.py +0 -6
  90. fabricks/utils/fdict.py +0 -240
  91. fabricks/utils/pydantic.py +0 -94
  92. fabricks/utils/schema/__init__.py +0 -7
  93. fabricks/utils/schema/get_json_schema_for_type.py +0 -161
  94. fabricks/utils/schema/get_schema_for_type.py +0 -99
  95. {fabricks-3.0.19.dist-info → fabricks-4.0.1.dist-info}/WHEEL +0 -0
@@ -1,10 +1,3 @@
1
- from fabricks.core.jobs.base._types import Bronzes, Golds, Silvers, Steps
2
1
  from fabricks.core.jobs.base.job import BaseJob
3
2
 
4
- __all__ = [
5
- "BaseJob",
6
- "Bronzes",
7
- "Golds",
8
- "Silvers",
9
- "Steps",
10
- ]
3
+ __all__ = ["BaseJob"]
@@ -19,10 +19,10 @@ class Checker(Generator):
19
19
  self._check("post_run")
20
20
 
21
21
  def _check(self, position: Literal["pre_run", "post_run"]):
22
- if self.options.check.get(position):
22
+ if self.check_options and getattr(self.check_options, position):
23
23
  DEFAULT_LOGGER.debug(f"check {position}", extra={"label": self})
24
24
 
25
- p = self.paths.runtime.append(f".{position}.sql")
25
+ p = self.paths.to_runtime.append(f".{position}.sql")
26
26
  assert p.exists(), f"{position} check not found ({p})"
27
27
 
28
28
  df = self.spark.sql(p.get_sql())
@@ -54,9 +54,9 @@ class Checker(Generator):
54
54
  raise PostRunCheckWarning(row["__message"], dataframe=df)
55
55
 
56
56
  def check_post_run_extra(self):
57
- min_rows = self.options.check.get("min_rows")
58
- max_rows = self.options.check.get("max_rows")
59
- count_must_equal = self.options.check.get("count_must_equal")
57
+ min_rows = self.check_options.min_rows if self.check_options else None
58
+ max_rows = self.check_options.max_rows if self.check_options else None
59
+ count_must_equal = self.check_options.count_must_equal if self.check_options else None
60
60
 
61
61
  if min_rows or max_rows or count_must_equal:
62
62
  df = self.spark.sql(f"select count(*) from {self}")
@@ -121,10 +121,10 @@ class Checker(Generator):
121
121
  self._check_duplicate_in_column("__identity")
122
122
 
123
123
  def check_skip_run(self):
124
- if self.options.check.get("skip"):
124
+ if self.check_options and self.check_options.skip:
125
125
  DEFAULT_LOGGER.debug("check if run should be skipped", extra={"label": self})
126
126
 
127
- p = self.paths.runtime.append(".skip.sql")
127
+ p = self.paths.to_runtime.append(".skip.sql")
128
128
  assert p.exists(), "skip check not found"
129
129
 
130
130
  df = self.spark.sql(p.get_sql())
@@ -1,41 +1,58 @@
1
1
  from abc import ABC, abstractmethod
2
- from functools import lru_cache
3
- from typing import Optional, Union, cast
2
+ from typing import List, Optional, Union, cast
4
3
 
5
4
  from pyspark.sql import DataFrame, SparkSession
6
5
  from pyspark.sql.types import Row
7
6
  from typing_extensions import deprecated
8
7
 
9
- from fabricks.cdc import SCD1, SCD2, AllowedChangeDataCaptures, NoCDC
10
- from fabricks.context import CONF_RUNTIME, PATHS_RUNTIME, PATHS_STORAGE, STEPS
8
+ from fabricks.cdc import SCD1, SCD2, NoCDC
9
+ from fabricks.context import PATHS_RUNTIME, PATHS_STORAGE, STEPS
11
10
  from fabricks.context.log import DEFAULT_LOGGER
12
11
  from fabricks.context.spark_session import build_spark_session
13
- from fabricks.core.jobs.base._types import AllowedModes, Options, Paths, TStep
14
12
  from fabricks.core.jobs.get_job_conf import get_job_conf
15
- from fabricks.core.jobs.get_job_id import get_job_id
16
13
  from fabricks.metastore.table import Table
17
- from fabricks.utils.fdict import FDict
18
- from fabricks.utils.path import Path
14
+ from fabricks.models import (
15
+ AllowedChangeDataCaptures,
16
+ AllowedModes,
17
+ CheckOptions,
18
+ ExtenderOptions,
19
+ InvokerOptions,
20
+ Paths,
21
+ RuntimeOptions,
22
+ SparkOptions,
23
+ StepBronzeConf,
24
+ StepBronzeOptions,
25
+ StepGoldConf,
26
+ StepGoldOptions,
27
+ StepSilverConf,
28
+ StepSilverOptions,
29
+ StepTableOptions,
30
+ TableOptions,
31
+ TOptions,
32
+ get_job_id,
33
+ )
34
+ from fabricks.models.runtime import RuntimeConf
19
35
 
20
36
 
21
37
  class Configurator(ABC):
22
38
  def __init__(
23
39
  self,
24
40
  expand: str,
25
- step: TStep,
41
+ step: str,
26
42
  topic: Optional[str] = None,
27
43
  item: Optional[str] = None,
28
44
  job_id: Optional[str] = None,
29
45
  conf: Optional[Union[dict, Row]] = None,
30
46
  ):
31
47
  self.expand = expand
32
- self.step: TStep = step
48
+ self.step = step
33
49
 
34
50
  if job_id is not None:
35
51
  self.job_id = job_id
36
52
  self.conf = get_job_conf(step=self.step, job_id=self.job_id, row=conf)
37
53
  self.topic = self.conf.topic
38
54
  self.item = self.conf.item
55
+
39
56
  else:
40
57
  assert topic
41
58
  assert item
@@ -44,13 +61,15 @@ class Configurator(ABC):
44
61
  self.conf = get_job_conf(step=self.step, topic=self.topic, item=self.item, row=conf)
45
62
  self.job_id = get_job_id(step=self.step, topic=self.topic, item=self.item)
46
63
 
47
- _step_conf: Optional[dict[str, str]] = None
64
+ _step_conf: Optional[Union[StepBronzeConf, StepSilverConf, StepGoldConf]] = None
65
+ _step_options: Optional[Union[StepBronzeOptions, StepSilverOptions, StepGoldOptions]] = None
66
+ _step_table_options: Optional[StepTableOptions] = None
67
+ _runtime_options: Optional[RuntimeOptions] = None
68
+ _runtime_conf: Optional[RuntimeConf] = None
48
69
  _spark: Optional[SparkSession] = None
49
70
  _timeout: Optional[int] = None
50
- _options: Optional[Options] = None
51
71
  _paths: Optional[Paths] = None
52
72
  _table: Optional[Table] = None
53
- _root: Optional[Path] = None
54
73
 
55
74
  _cdc: Optional[Union[NoCDC, SCD1, SCD2]] = None
56
75
  _change_data_capture: Optional[AllowedChangeDataCaptures] = None
@@ -83,26 +102,29 @@ class Configurator(ABC):
83
102
  if not self._spark:
84
103
  spark = build_spark_session(app_name=str(self))
85
104
 
86
- step_options = self.step_conf.get("spark_options", {})
87
- step_sql_options = step_options.get("sql", {})
88
- step_conf_options = step_options.get("conf", {})
89
- if step_sql_options:
90
- for key, value in step_sql_options.items():
105
+ # Apply step-level spark options if configured
106
+ step_spark = self.step_spark_options
107
+ if step_spark:
108
+ sql_options = step_spark.sql or {}
109
+ for key, value in sql_options.items():
91
110
  DEFAULT_LOGGER.debug(f"add {key} = {value}", extra={"label": self.step})
92
111
  spark.sql(f"set {key} = {value}")
93
- if step_conf_options:
94
- for key, value in step_conf_options.items():
112
+
113
+ conf_options = step_spark.conf or {}
114
+ for key, value in conf_options.items():
95
115
  DEFAULT_LOGGER.debug(f"add {key} = {value}", extra={"label": self.step})
96
116
  spark.conf.set(f"{key}", f"{value}")
97
117
 
98
- job_sql_options = self.options.spark.get_dict("sql")
99
- job_conf_options = self.options.spark.get_dict("conf")
100
- if job_sql_options:
101
- for key, value in job_sql_options.items():
118
+ # Apply job-level spark options if configured
119
+ job_spark = self.spark_options
120
+ if job_spark:
121
+ sql_options = job_spark.sql or {}
122
+ for key, value in sql_options.items():
102
123
  DEFAULT_LOGGER.debug(f"add {key} = {value}", extra={"label": self})
103
124
  spark.sql(f"set {key} = {value}")
104
- if job_conf_options:
105
- for key, value in job_conf_options.items():
125
+
126
+ conf_options = job_spark.conf or {}
127
+ for key, value in conf_options.items():
106
128
  DEFAULT_LOGGER.debug(f"add {key} = {value}", extra={"label": self})
107
129
  spark.conf.set(f"{key}", f"{value}")
108
130
 
@@ -110,11 +132,11 @@ class Configurator(ABC):
110
132
  return self._spark
111
133
 
112
134
  @property
113
- def step_conf(self) -> dict:
135
+ def base_step_conf(self) -> Union[StepBronzeConf, StepSilverConf, StepGoldConf]:
114
136
  if not self._step_conf:
115
- _conf = [s for s in STEPS if s.get("name") == self.step][0]
137
+ _conf = [s for s in STEPS if s.name == self.step][0]
116
138
  assert _conf is not None
117
- self._step_conf = cast(dict[str, str], _conf)
139
+ self._step_conf = _conf
118
140
  return self._step_conf
119
141
 
120
142
  @property
@@ -122,16 +144,16 @@ class Configurator(ABC):
122
144
  return f"{self.step}.{self.topic}_{self.item}"
123
145
 
124
146
  def _get_timeout(self, what: str) -> int:
125
- t = self.step_conf.get("options", {}).get("timeouts", {}).get(what, None)
147
+ t = getattr(self.step_options.timeouts, what, None)
126
148
  if t is None:
127
- t = CONF_RUNTIME.get("options", {}).get("timeouts", {}).get(what)
149
+ t = getattr(self.runtime_options.timeouts, what)
128
150
  assert t is not None
129
151
  return t
130
152
 
131
153
  @property
132
154
  def timeout(self) -> int:
133
155
  if not self._timeout:
134
- t = self.options.job.get("timeout")
156
+ t = self.options.timeout
135
157
 
136
158
  if t is None:
137
159
  t = self._get_timeout("job")
@@ -158,48 +180,105 @@ class Configurator(ABC):
158
180
  assert runtime_root
159
181
 
160
182
  self._paths = Paths(
161
- storage=storage,
162
- tmp=storage.joinpath("tmp", self.topic, self.item),
163
- checkpoints=storage.joinpath("checkpoints", self.topic, self.item),
164
- commits=storage.joinpath("checkpoints", self.topic, self.item, "commits"),
165
- schema=storage.joinpath("schema", self.topic, self.item),
166
- runtime=runtime_root.joinpath(self.topic, self.item),
183
+ to_storage=storage,
184
+ to_tmp=storage.joinpath("tmp", self.topic, self.item),
185
+ to_checkpoints=storage.joinpath("checkpoints", self.topic, self.item),
186
+ to_commits=storage.joinpath("checkpoints", self.topic, self.item, "commits"),
187
+ to_schema=storage.joinpath("schema", self.topic, self.item),
188
+ to_runtime=runtime_root.joinpath(self.topic, self.item),
167
189
  )
168
190
 
191
+ assert self._paths is not None
169
192
  return self._paths
170
193
 
171
194
  @property
172
- @lru_cache(maxsize=None)
173
- def options(self) -> Options:
174
- if not self._options:
175
- job = self.conf.options or {}
176
- table = self.conf.table_options or {}
177
- check = self.conf.check_options or {}
178
- spark = self.conf.spark_options or {}
179
- invokers = self.conf.invoker_options or {}
180
- extenders = self.conf.extender_options or []
181
-
182
- self._options = Options(
183
- job=FDict(job),
184
- table=FDict(table),
185
- check=FDict(check),
186
- spark=FDict(spark),
187
- invokers=FDict(invokers),
188
- extenders=extenders,
189
- )
190
- return self._options
195
+ @abstractmethod
196
+ def options(self) -> TOptions:
197
+ """
198
+ Direct access to typed job options.
199
+
200
+ Subclasses must implement this property and return their specific typed
201
+ options instance (e.g. JobBronzeOptions, JobSilverOptions, or JobGoldOptions)
202
+ corresponding to the job type.
203
+ """
204
+ raise NotImplementedError()
205
+
206
+ @property
207
+ def runtime_conf(self) -> RuntimeConf:
208
+ """Direct access to typed runtime conf."""
209
+ if not self._runtime_conf:
210
+ from fabricks.context.runtime import CONF_RUNTIME
211
+
212
+ self._runtime_conf = CONF_RUNTIME
213
+ return self._runtime_conf
214
+
215
+ @property
216
+ @abstractmethod
217
+ def step_conf(self) -> Union[StepBronzeConf, StepSilverConf, StepGoldConf]:
218
+ """Direct access to typed step conf from context configuration."""
219
+ raise NotImplementedError()
220
+
221
+ @property
222
+ def step_options(self) -> Union[StepBronzeOptions, StepSilverOptions, StepGoldOptions]:
223
+ """Direct access to typed step-level options from context configuration."""
224
+ raise NotImplementedError()
225
+
226
+ @property
227
+ def step_table_options(self) -> Optional[StepTableOptions]:
228
+ """Direct access to typed step-level table options from context configuration."""
229
+ if self._step_table_options is None:
230
+ _step = [s for s in STEPS if s.name == self.step][0]
231
+ assert _step is not None
232
+ self._step_table_options = _step.table_options
233
+ return self._step_table_options
234
+
235
+ @property
236
+ def runtime_options(self) -> RuntimeOptions:
237
+ """Direct access to typed runtime options from context configuration."""
238
+ return self.runtime_conf.options
239
+
240
+ @property
241
+ def step_spark_options(self) -> Optional[SparkOptions]:
242
+ """Direct access to typed step-level spark options from context configuration.
243
+ Returns None if not configured at step level."""
244
+ return self.step_conf.spark_options
245
+
246
+ @property
247
+ def table_options(self) -> Optional[TableOptions]:
248
+ """Direct access to typed table options."""
249
+ return self.conf.table_options
250
+
251
+ @property
252
+ def check_options(self) -> Optional[CheckOptions]:
253
+ """Direct access to typed check options."""
254
+ return self.conf.check_options
255
+
256
+ @property
257
+ def spark_options(self) -> Optional[SparkOptions]:
258
+ """Direct access to typed spark options."""
259
+ return self.conf.spark_options
260
+
261
+ @property
262
+ def invoker_options(self) -> Optional[InvokerOptions]:
263
+ """Direct access to typed invoker options."""
264
+ return self.conf.invoker_options
265
+
266
+ @property
267
+ def extender_options(self) -> Optional[List[ExtenderOptions]]:
268
+ """Direct access to typed extender options."""
269
+ return self.conf.extender_options
191
270
 
192
271
  @property
193
272
  def change_data_capture(self) -> AllowedChangeDataCaptures:
194
273
  if not self._change_data_capture:
195
- cdc: AllowedChangeDataCaptures = self.options.job.get("change_data_capture") or "nocdc"
274
+ cdc: AllowedChangeDataCaptures = self.options.change_data_capture or "nocdc"
196
275
  self._change_data_capture = cdc
197
276
  return self._change_data_capture
198
277
 
199
278
  @property
200
279
  def cdc(self) -> Union[NoCDC, SCD1, SCD2]:
201
280
  if not self._cdc:
202
- if self.change_data_capture == "nocdc":
281
+ if self.change_data_capture in ["nocdc", "none"]:
203
282
  cdc = NoCDC(self.step, self.topic, self.item, spark=self.spark)
204
283
  elif self.change_data_capture == "scd1":
205
284
  cdc = SCD1(self.step, self.topic, self.item, spark=self.spark)
@@ -227,7 +306,7 @@ class Configurator(ABC):
227
306
  @property
228
307
  def mode(self) -> AllowedModes:
229
308
  if not self._mode:
230
- _mode = self.options.job.get("mode")
309
+ _mode = self.options.mode
231
310
  assert _mode is not None
232
311
  self._mode = cast(AllowedModes, _mode)
233
312
  return self._mode
@@ -288,9 +367,9 @@ class Configurator(ABC):
288
367
  DEFAULT_LOGGER.debug("could not vacuum (memory)", extra={"label": self})
289
368
 
290
369
  else:
291
- job = self.options.table.get("retention_days")
292
- step = self.step_conf.get("table_options", {}).get("retention_days", None)
293
- runtime = CONF_RUNTIME.get("options", {}).get("retention_days")
370
+ job = self.table_options.retention_days if self.table_options else None
371
+ step = self.step_table_options.retention_days if self.step_table_options else None
372
+ runtime = self.runtime_options.retention_days
294
373
 
295
374
  if job is not None:
296
375
  retention_days = job
@@ -6,10 +6,10 @@ from pyspark.sql.functions import lit
6
6
 
7
7
  from fabricks.cdc import NoCDC
8
8
  from fabricks.context.log import DEFAULT_LOGGER
9
- from fabricks.core.jobs.base._types import JobDependency
10
9
  from fabricks.core.jobs.base.configurator import Configurator
11
10
  from fabricks.metastore.table import SchemaDiff
12
11
  from fabricks.metastore.view import create_or_replace_global_temp_view
12
+ from fabricks.models import JobDependency
13
13
 
14
14
 
15
15
  class Generator(Configurator):
@@ -31,9 +31,9 @@ class Generator(Configurator):
31
31
 
32
32
  If the schema folder exists, it will be deleted. The method also calls the `rm_checkpoints` method to remove any checkpoints associated with the generator.
33
33
  """
34
- if self.paths.schema.exists():
34
+ if self.paths.to_schema.exists():
35
35
  DEFAULT_LOGGER.info("delete schema folder", extra={"label": self})
36
- self.paths.schema.rm()
36
+ self.paths.to_schema.rm()
37
37
  self.rm_checkpoints()
38
38
 
39
39
  def rm_checkpoints(self):
@@ -42,9 +42,9 @@ class Generator(Configurator):
42
42
 
43
43
  This method checks if the checkpoints folder exists and deletes it if it does.
44
44
  """
45
- if self.paths.checkpoints.exists():
45
+ if self.paths.to_checkpoints.exists():
46
46
  DEFAULT_LOGGER.info("delete checkpoints folder", extra={"label": self})
47
- self.paths.checkpoints.rm()
47
+ self.paths.to_checkpoints.rm()
48
48
 
49
49
  def rm_commit(self, id: Union[str, int]):
50
50
  """
@@ -56,7 +56,7 @@ class Generator(Configurator):
56
56
  Returns:
57
57
  None
58
58
  """
59
- path = self.paths.commits.joinpath(str(id))
59
+ path = self.paths.to_commits.joinpath(str(id))
60
60
  if path.exists():
61
61
  DEFAULT_LOGGER.warning(f"delete commit {id}", extra={"label": self})
62
62
  path.rm()
@@ -91,7 +91,7 @@ class Generator(Configurator):
91
91
  Returns:
92
92
  None
93
93
  """
94
- if self.options.job.get("no_drop"):
94
+ if self.options.no_drop:
95
95
  raise ValueError("no_drop is set, cannot drop the job")
96
96
 
97
97
  try:
@@ -167,7 +167,7 @@ class Generator(Configurator):
167
167
  ...
168
168
 
169
169
  def _get_clustering_columns(self, df: DataFrame) -> Optional[List[str]]:
170
- columns = self.options.table.get_list("cluster_by")
170
+ columns = self.table_options.cluster_by or [] if self.table_options else []
171
171
  if columns:
172
172
  return columns
173
173
 
@@ -205,16 +205,16 @@ class Generator(Configurator):
205
205
  identity = False
206
206
 
207
207
  # first take from job options, then from step options
208
- job_powerbi = self.options.table.get_boolean("powerbi", None)
209
- step_powerbi = self.step_conf.get("table_options", {}).get("powerbi", None)
208
+ job_powerbi = self.table_options.powerbi if self.table_options else None
209
+ step_powerbi = self.step_conf.table_options.powerbi if self.step_conf.table_options else None
210
210
  if job_powerbi is not None:
211
211
  powerbi = job_powerbi
212
212
  elif step_powerbi is not None:
213
213
  powerbi = step_powerbi
214
214
 
215
215
  # first take from job options, then from step options
216
- job_masks = self.options.table.get("masks", None)
217
- step_masks = self.step_conf.get("table_options", {}).get("masks", None)
216
+ job_masks = self.table_options.masks if self.table_options else None
217
+ step_masks = self.step_conf.table_options.masks if self.step_conf.table_options else None
218
218
  if job_masks is not None:
219
219
  masks = job_masks
220
220
  elif step_masks is not None:
@@ -222,7 +222,9 @@ class Generator(Configurator):
222
222
  else:
223
223
  masks = None
224
224
 
225
- maximum_compatibility = self.options.table.get_boolean("maximum_compatibility", False)
225
+ maximum_compatibility = self.table_options.maximum_compatibility if self.table_options else False
226
+
227
+ default_properties: dict[str, str | bool | int] = {}
226
228
 
227
229
  if maximum_compatibility:
228
230
  default_properties = {
@@ -251,11 +253,13 @@ class Generator(Configurator):
251
253
  if "__identity" in df.columns:
252
254
  identity = False
253
255
  else:
254
- identity = self.options.table.get_boolean("identity", False)
256
+ identity = self.table_options.identity if self.table_options else False
255
257
 
256
258
  # first take from job options, then from step options
257
- liquid_clustering_job = self.options.table.get("liquid_clustering", None)
258
- liquid_clustering_step = self.step_conf.get("table_options", {}).get("liquid_clustering", None)
259
+ liquid_clustering_job = self.table_options.liquid_clustering if self.table_options else None
260
+ liquid_clustering_step = (
261
+ self.step_conf.table_options.liquid_clustering if self.step_conf.table_options else None
262
+ )
259
263
  if liquid_clustering_job is not None:
260
264
  liquid_clustering = liquid_clustering_job
261
265
  elif liquid_clustering_step:
@@ -278,24 +282,24 @@ class Generator(Configurator):
278
282
 
279
283
  if liquid_clustering is None:
280
284
  cluster_by = None
281
- partition_by = self.options.table.get_list("partition_by")
285
+ partition_by = self.table_options.partition_by or [] if self.table_options else []
282
286
  if partition_by:
283
287
  partitioning = True
284
288
 
285
289
  properties = None
286
290
  if not powerbi:
287
291
  # first take from job options, then from step options
288
- if self.options.table.get_dict("properties"):
289
- properties = self.options.table.get_dict("properties")
290
- elif self.step_conf.get("table_options", {}).get("properties", {}):
291
- properties = self.step_conf.get("table_options", {}).get("properties", {})
292
+ if self.table_options and self.table_options.properties:
293
+ properties = self.table_options.properties
294
+ elif self.step_conf.table_options and self.step_conf.table_options.properties:
295
+ properties = self.step_conf.table_options.properties
292
296
 
293
297
  if properties is None:
294
298
  properties = default_properties
295
299
 
296
- primary_key = self.options.table.get_dict("primary_key")
297
- foreign_keys = self.options.table.get_dict("foreign_keys")
298
- comments = self.options.table.get_dict("comments")
300
+ primary_key = self.table_options.primary_key or {} if self.table_options else {}
301
+ foreign_keys = self.table_options.foreign_keys or {} if self.table_options else {}
302
+ comments = self.table_options.comments or {} if self.table_options else {}
299
303
 
300
304
  # if dataframe, reference is passed (BUG)
301
305
  name = f"{self.step}_{self.topic}_{self.item}__init"
@@ -332,7 +336,7 @@ class Generator(Configurator):
332
336
  dummy_df = dummy_df.select("__metadata")
333
337
 
334
338
  df = df.unionByName(dummy_df, allowMissingColumns=True)
335
- path = self.paths.checkpoints.append("__init")
339
+ path = self.paths.to_checkpoints.append("__init")
336
340
  if path.exists():
337
341
  path.rm()
338
342
 
@@ -347,12 +351,12 @@ class Generator(Configurator):
347
351
  else:
348
352
  _create_table(df)
349
353
 
350
- constraints = self.options.table.get_dict("constraints")
354
+ constraints = self.table_options.constraints or {} if self.table_options else {}
351
355
  if constraints:
352
356
  for key, value in constraints.items():
353
- self.table.add_constraint(name=key, expr=value)
357
+ self.table.add_constraint(name=key, expr=str(value))
354
358
 
355
- comment = self.options.table.get("comment")
359
+ comment = self.table_options.comment if self.table_options else None
356
360
  if comment:
357
361
  self.table.add_table_comment(comment=comment)
358
362
 
@@ -382,7 +386,7 @@ class Generator(Configurator):
382
386
  df = self.base_transform(df)
383
387
 
384
388
  if self.stream:
385
- path = self.paths.checkpoints.append("__schema")
389
+ path = self.paths.to_checkpoints.append("__schema")
386
390
  query = (
387
391
  df.writeStream.foreachBatch(_update_schema)
388
392
  .option("checkpointLocation", path.string)
@@ -415,15 +419,15 @@ class Generator(Configurator):
415
419
  self.table.drop_comments()
416
420
 
417
421
  if table:
418
- comment = self.options.table.get("comment")
422
+ comment = self.table_options.comment if self.table_options else None
419
423
  if comment:
420
424
  self.table.add_table_comment(comment=comment)
421
425
 
422
426
  if columns:
423
- comments = self.options.table.get_dict("comments")
427
+ comments = self.table_options.comments or {} if self.table_options else {}
424
428
  if comments:
425
429
  for col, comment in comments.items():
426
- self.table.add_column_comment(column=col, comment=comment)
430
+ self.table.add_column_comment(column=col, comment=str(comment))
427
431
 
428
432
  def get_differences_with_deltatable(self, df: Optional[DataFrame] = None):
429
433
  if df is None:
@@ -456,8 +460,8 @@ class Generator(Configurator):
456
460
  enable = False
457
461
 
458
462
  # first take from job options, then from step options
459
- enable_job = self.options.table.get_boolean("liquid_clustering", None)
460
- enable_step = self.step_conf.get("table_options", {}).get("liquid_clustering", None)
463
+ enable_job = self.table_options.liquid_clustering if self.table_options else None
464
+ enable_step = self.step_conf.table_options.liquid_clustering if self.step_conf.table_options else None
461
465
  if enable_job is not None:
462
466
  enable = enable_job
463
467
  elif enable_step: