PyPI - fabricks - Versions diffs - 3.0.19__py3-none-any.whl → 4.0.1__py3-none-any.whl - Mend

fabricks 3.0.19py3-none-any.whl → 4.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

fabricks/api/context.py +15 -3
fabricks/api/notebooks/schedule.py +2 -3
fabricks/api/parsers.py +2 -1
fabricks/api/utils.py +3 -1
fabricks/cdc/__init__.py +1 -2
fabricks/cdc/base/__init__.py +1 -2
fabricks/cdc/base/_types.py +5 -3
fabricks/cdc/base/configurator.py +5 -0
fabricks/cdc/base/generator.py +7 -3
fabricks/cdc/base/merger.py +2 -0
fabricks/cdc/base/processor.py +15 -0
fabricks/cdc/templates/README.md +490 -0
fabricks/cdc/templates/ctes/base.sql.jinja +1 -0
fabricks/cdc/templates/ctes/current.sql.jinja +4 -0
fabricks/cdc/templates/merges/scd1.sql.jinja +6 -0
fabricks/cdc/templates/merges/scd2.sql.jinja +6 -0
fabricks/cdc/templates/queries/context.sql.jinja +104 -96
fabricks/cdc/templates/query.sql.jinja +1 -1
fabricks/context/__init__.py +13 -1
fabricks/context/config.py +13 -122
fabricks/context/log.py +92 -1
fabricks/context/runtime.py +35 -69
fabricks/context/spark_session.py +4 -4
fabricks/context/utils.py +26 -39
fabricks/core/__init__.py +2 -2
fabricks/core/dags/base.py +5 -5
fabricks/core/dags/processor.py +2 -3
fabricks/core/extenders.py +1 -1
fabricks/core/job_schema.py +26 -16
fabricks/core/jobs/__init__.py +1 -7
fabricks/core/jobs/base/README.md +1545 -0
fabricks/core/jobs/base/__init__.py +1 -8
fabricks/core/jobs/base/checker.py +7 -7
fabricks/core/jobs/base/configurator.py +142 -63
fabricks/core/jobs/base/generator.py +38 -34
fabricks/core/jobs/base/invoker.py +48 -63
fabricks/core/jobs/base/processor.py +13 -28
fabricks/core/jobs/bronze.py +88 -38
fabricks/core/jobs/get_job.py +3 -6
fabricks/core/jobs/get_job_conf.py +19 -68
fabricks/core/jobs/get_jobs.py +10 -11
fabricks/core/jobs/get_schedules.py +3 -17
fabricks/core/jobs/gold.py +89 -47
fabricks/core/jobs/silver.py +42 -22
fabricks/core/masks.py +11 -8
fabricks/core/parsers/__init__.py +0 -2
fabricks/core/parsers/base.py +10 -10
fabricks/core/parsers/decorator.py +1 -1
fabricks/core/parsers/get_parser.py +4 -5
fabricks/core/schedules/process.py +1 -4
fabricks/core/steps/base.py +27 -17
fabricks/core/steps/get_step.py +2 -4
fabricks/core/steps/get_step_conf.py +3 -7
fabricks/core/udfs.py +7 -7
fabricks/core/views.py +2 -2
fabricks/deploy/__init__.py +27 -16
fabricks/deploy/masks.py +1 -1
fabricks/deploy/notebooks.py +19 -16
fabricks/deploy/schedules.py +1 -1
fabricks/deploy/tables.py +66 -49
fabricks/deploy/udfs.py +2 -2
fabricks/deploy/views.py +15 -16
fabricks/metastore/database.py +3 -3
fabricks/metastore/table.py +103 -68
fabricks/models/__init__.py +125 -0
fabricks/models/common.py +79 -0
fabricks/models/config.py +225 -0
fabricks/models/dependency.py +50 -0
fabricks/models/job.py +157 -0
fabricks/models/path.py +17 -0
fabricks/models/runtime.py +182 -0
fabricks/models/schedule.py +21 -0
fabricks/models/step.py +103 -0
fabricks/models/table.py +77 -0
fabricks/{core/jobs/get_job_id.py → models/utils.py} +2 -0
fabricks/utils/helpers.py +6 -5
fabricks/utils/log.py +25 -6
fabricks/utils/path.py +265 -108
fabricks/utils/pip.py +7 -7
fabricks/utils/read/read.py +23 -22
fabricks/utils/read/read_yaml.py +2 -2
fabricks/utils/write/delta.py +4 -4
fabricks/utils/write/stream.py +2 -2
{fabricks-3.0.19.dist-info → fabricks-4.0.1.dist-info}/METADATA +9 -4
{fabricks-3.0.19.dist-info → fabricks-4.0.1.dist-info}/RECORD +86 -83
fabricks/context/_types.py +0 -139
fabricks/context/helpers.py +0 -63
fabricks/core/jobs/base/_types.py +0 -284
fabricks/core/parsers/_types.py +0 -6
fabricks/utils/fdict.py +0 -240
fabricks/utils/pydantic.py +0 -94
fabricks/utils/schema/__init__.py +0 -7
fabricks/utils/schema/get_json_schema_for_type.py +0 -161
fabricks/utils/schema/get_schema_for_type.py +0 -99
{fabricks-3.0.19.dist-info → fabricks-4.0.1.dist-info}/WHEEL +0 -0

fabricks/core/jobs/base/__init__.py CHANGED Viewed

@@ -1,10 +1,3 @@
-from fabricks.core.jobs.base._types import Bronzes, Golds, Silvers, Steps
 from fabricks.core.jobs.base.job import BaseJob
-__all__ = [
-    "BaseJob",
-    "Bronzes",
-    "Golds",
-    "Silvers",
-    "Steps",
-]
+__all__ = ["BaseJob"]

fabricks/core/jobs/base/checker.py CHANGED Viewed

@@ -19,10 +19,10 @@ class Checker(Generator):
         self._check("post_run")
     def _check(self, position: Literal["pre_run", "post_run"]):
-        if self.options.check.get(position):
+        if self.check_options and getattr(self.check_options, position):
             DEFAULT_LOGGER.debug(f"check {position}", extra={"label": self})
-            p = self.paths.runtime.append(f".{position}.sql")
+            p = self.paths.to_runtime.append(f".{position}.sql")
             assert p.exists(), f"{position} check not found ({p})"
             df = self.spark.sql(p.get_sql())
@@ -54,9 +54,9 @@ class Checker(Generator):
                     raise PostRunCheckWarning(row["__message"], dataframe=df)
     def check_post_run_extra(self):
-        min_rows = self.options.check.get("min_rows")
-        max_rows = self.options.check.get("max_rows")
-        count_must_equal = self.options.check.get("count_must_equal")
+        min_rows = self.check_options.min_rows if self.check_options else None
+        max_rows = self.check_options.max_rows if self.check_options else None
+        count_must_equal = self.check_options.count_must_equal if self.check_options else None
         if min_rows or max_rows or count_must_equal:
             df = self.spark.sql(f"select count(*) from {self}")
@@ -121,10 +121,10 @@ class Checker(Generator):
         self._check_duplicate_in_column("__identity")
     def check_skip_run(self):
-        if self.options.check.get("skip"):
+        if self.check_options and self.check_options.skip:
             DEFAULT_LOGGER.debug("check if run should be skipped", extra={"label": self})
-            p = self.paths.runtime.append(".skip.sql")
+            p = self.paths.to_runtime.append(".skip.sql")
             assert p.exists(), "skip check not found"
             df = self.spark.sql(p.get_sql())

fabricks/core/jobs/base/configurator.py CHANGED Viewed

@@ -1,41 +1,58 @@
 from abc import ABC, abstractmethod
-from functools import lru_cache
-from typing import Optional, Union, cast
+from typing import List, Optional, Union, cast
 from pyspark.sql import DataFrame, SparkSession
 from pyspark.sql.types import Row
 from typing_extensions import deprecated
-from fabricks.cdc import SCD1, SCD2, AllowedChangeDataCaptures, NoCDC
-from fabricks.context import CONF_RUNTIME, PATHS_RUNTIME, PATHS_STORAGE, STEPS
+from fabricks.cdc import SCD1, SCD2, NoCDC
+from fabricks.context import PATHS_RUNTIME, PATHS_STORAGE, STEPS
 from fabricks.context.log import DEFAULT_LOGGER
 from fabricks.context.spark_session import build_spark_session
-from fabricks.core.jobs.base._types import AllowedModes, Options, Paths, TStep
 from fabricks.core.jobs.get_job_conf import get_job_conf
-from fabricks.core.jobs.get_job_id import get_job_id
 from fabricks.metastore.table import Table
-from fabricks.utils.fdict import FDict
-from fabricks.utils.path import Path
+from fabricks.models import (
+    AllowedChangeDataCaptures,
+    AllowedModes,
+    CheckOptions,
+    ExtenderOptions,
+    InvokerOptions,
+    Paths,
+    RuntimeOptions,
+    SparkOptions,
+    StepBronzeConf,
+    StepBronzeOptions,
+    StepGoldConf,
+    StepGoldOptions,
+    StepSilverConf,
+    StepSilverOptions,
+    StepTableOptions,
+    TableOptions,
+    TOptions,
+    get_job_id,
+)
+from fabricks.models.runtime import RuntimeConf
 class Configurator(ABC):
     def __init__(
         self,
         expand: str,
-        step: TStep,
+        step: str,
         topic: Optional[str] = None,
         item: Optional[str] = None,
         job_id: Optional[str] = None,
         conf: Optional[Union[dict, Row]] = None,
     ):
         self.expand = expand
-        self.step: TStep = step
+        self.step = step
         if job_id is not None:
             self.job_id = job_id
             self.conf = get_job_conf(step=self.step, job_id=self.job_id, row=conf)
             self.topic = self.conf.topic
             self.item = self.conf.item
         else:
             assert topic
             assert item
@@ -44,13 +61,15 @@ class Configurator(ABC):
             self.conf = get_job_conf(step=self.step, topic=self.topic, item=self.item, row=conf)
             self.job_id = get_job_id(step=self.step, topic=self.topic, item=self.item)
-    _step_conf: Optional[dict[str, str]] = None
+    _step_conf: Optional[Union[StepBronzeConf, StepSilverConf, StepGoldConf]] = None
+    _step_options: Optional[Union[StepBronzeOptions, StepSilverOptions, StepGoldOptions]] = None
+    _step_table_options: Optional[StepTableOptions] = None
+    _runtime_options: Optional[RuntimeOptions] = None
+    _runtime_conf: Optional[RuntimeConf] = None
     _spark: Optional[SparkSession] = None
     _timeout: Optional[int] = None
-    _options: Optional[Options] = None
     _paths: Optional[Paths] = None
     _table: Optional[Table] = None
-    _root: Optional[Path] = None
     _cdc: Optional[Union[NoCDC, SCD1, SCD2]] = None
     _change_data_capture: Optional[AllowedChangeDataCaptures] = None
@@ -83,26 +102,29 @@ class Configurator(ABC):
         if not self._spark:
             spark = build_spark_session(app_name=str(self))
-            step_options = self.step_conf.get("spark_options", {})
-            step_sql_options = step_options.get("sql", {})
-            step_conf_options = step_options.get("conf", {})
-            if step_sql_options:
-                for key, value in step_sql_options.items():
+            # Apply step-level spark options if configured
+            step_spark = self.step_spark_options
+            if step_spark:
+                sql_options = step_spark.sql or {}
+                for key, value in sql_options.items():
                     DEFAULT_LOGGER.debug(f"add {key} = {value}", extra={"label": self.step})
                     spark.sql(f"set {key} = {value}")
-            if step_conf_options:
-                for key, value in step_conf_options.items():
+                conf_options = step_spark.conf or {}
+                for key, value in conf_options.items():
                     DEFAULT_LOGGER.debug(f"add {key} = {value}", extra={"label": self.step})
                     spark.conf.set(f"{key}", f"{value}")
-            job_sql_options = self.options.spark.get_dict("sql")
-            job_conf_options = self.options.spark.get_dict("conf")
-            if job_sql_options:
-                for key, value in job_sql_options.items():
+            # Apply job-level spark options if configured
+            job_spark = self.spark_options
+            if job_spark:
+                sql_options = job_spark.sql or {}
+                for key, value in sql_options.items():
                     DEFAULT_LOGGER.debug(f"add {key} = {value}", extra={"label": self})
                     spark.sql(f"set {key} = {value}")
-            if job_conf_options:
-                for key, value in job_conf_options.items():
+                conf_options = job_spark.conf or {}
+                for key, value in conf_options.items():
                     DEFAULT_LOGGER.debug(f"add {key} = {value}", extra={"label": self})
                     spark.conf.set(f"{key}", f"{value}")
@@ -110,11 +132,11 @@ class Configurator(ABC):
         return self._spark
     @property
-    def step_conf(self) -> dict:
+    def base_step_conf(self) -> Union[StepBronzeConf, StepSilverConf, StepGoldConf]:
         if not self._step_conf:
-            _conf = [s for s in STEPS if s.get("name") == self.step][0]
+            _conf = [s for s in STEPS if s.name == self.step][0]
             assert _conf is not None
-            self._step_conf = cast(dict[str, str], _conf)
+            self._step_conf = _conf
         return self._step_conf
     @property
@@ -122,16 +144,16 @@ class Configurator(ABC):
         return f"{self.step}.{self.topic}_{self.item}"
     def _get_timeout(self, what: str) -> int:
-        t = self.step_conf.get("options", {}).get("timeouts", {}).get(what, None)
+        t = getattr(self.step_options.timeouts, what, None)
         if t is None:
-            t = CONF_RUNTIME.get("options", {}).get("timeouts", {}).get(what)
+            t = getattr(self.runtime_options.timeouts, what)
         assert t is not None
         return t
     @property
     def timeout(self) -> int:
         if not self._timeout:
-            t = self.options.job.get("timeout")
+            t = self.options.timeout
             if t is None:
                 t = self._get_timeout("job")
@@ -158,48 +180,105 @@ class Configurator(ABC):
             assert runtime_root
             self._paths = Paths(
-                storage=storage,
-                tmp=storage.joinpath("tmp", self.topic, self.item),
-                checkpoints=storage.joinpath("checkpoints", self.topic, self.item),
-                commits=storage.joinpath("checkpoints", self.topic, self.item, "commits"),
-                schema=storage.joinpath("schema", self.topic, self.item),
-                runtime=runtime_root.joinpath(self.topic, self.item),
+                to_storage=storage,
+                to_tmp=storage.joinpath("tmp", self.topic, self.item),
+                to_checkpoints=storage.joinpath("checkpoints", self.topic, self.item),
+                to_commits=storage.joinpath("checkpoints", self.topic, self.item, "commits"),
+                to_schema=storage.joinpath("schema", self.topic, self.item),
+                to_runtime=runtime_root.joinpath(self.topic, self.item),
             )
+        assert self._paths is not None
         return self._paths
     @property
-    @lru_cache(maxsize=None)
-    def options(self) -> Options:
-        if not self._options:
-            job = self.conf.options or {}
-            table = self.conf.table_options or {}
-            check = self.conf.check_options or {}
-            spark = self.conf.spark_options or {}
-            invokers = self.conf.invoker_options or {}
-            extenders = self.conf.extender_options or []
-            self._options = Options(
-                job=FDict(job),
-                table=FDict(table),
-                check=FDict(check),
-                spark=FDict(spark),
-                invokers=FDict(invokers),
-                extenders=extenders,
-            )
-        return self._options
+    @abstractmethod
+    def options(self) -> TOptions:
+        """
+        Direct access to typed job options.
+        Subclasses must implement this property and return their specific typed
+        options instance (e.g. JobBronzeOptions, JobSilverOptions, or JobGoldOptions)
+        corresponding to the job type.
+        """
+        raise NotImplementedError()
+    @property
+    def runtime_conf(self) -> RuntimeConf:
+        """Direct access to typed runtime conf."""
+        if not self._runtime_conf:
+            from fabricks.context.runtime import CONF_RUNTIME
+            self._runtime_conf = CONF_RUNTIME
+        return self._runtime_conf
+    @property
+    @abstractmethod
+    def step_conf(self) -> Union[StepBronzeConf, StepSilverConf, StepGoldConf]:
+        """Direct access to typed step conf from context configuration."""
+        raise NotImplementedError()
+    @property
+    def step_options(self) -> Union[StepBronzeOptions, StepSilverOptions, StepGoldOptions]:
+        """Direct access to typed step-level options from context configuration."""
+        raise NotImplementedError()
+    @property
+    def step_table_options(self) -> Optional[StepTableOptions]:
+        """Direct access to typed step-level table options from context configuration."""
+        if self._step_table_options is None:
+            _step = [s for s in STEPS if s.name == self.step][0]
+            assert _step is not None
+            self._step_table_options = _step.table_options
+        return self._step_table_options
+    @property
+    def runtime_options(self) -> RuntimeOptions:
+        """Direct access to typed runtime options from context configuration."""
+        return self.runtime_conf.options
+    @property
+    def step_spark_options(self) -> Optional[SparkOptions]:
+        """Direct access to typed step-level spark options from context configuration.
+        Returns None if not configured at step level."""
+        return self.step_conf.spark_options
+    @property
+    def table_options(self) -> Optional[TableOptions]:
+        """Direct access to typed table options."""
+        return self.conf.table_options
+    @property
+    def check_options(self) -> Optional[CheckOptions]:
+        """Direct access to typed check options."""
+        return self.conf.check_options
+    @property
+    def spark_options(self) -> Optional[SparkOptions]:
+        """Direct access to typed spark options."""
+        return self.conf.spark_options
+    @property
+    def invoker_options(self) -> Optional[InvokerOptions]:
+        """Direct access to typed invoker options."""
+        return self.conf.invoker_options
+    @property
+    def extender_options(self) -> Optional[List[ExtenderOptions]]:
+        """Direct access to typed extender options."""
+        return self.conf.extender_options
     @property
     def change_data_capture(self) -> AllowedChangeDataCaptures:
         if not self._change_data_capture:
-            cdc: AllowedChangeDataCaptures = self.options.job.get("change_data_capture") or "nocdc"
+            cdc: AllowedChangeDataCaptures = self.options.change_data_capture or "nocdc"
             self._change_data_capture = cdc
         return self._change_data_capture
     @property
     def cdc(self) -> Union[NoCDC, SCD1, SCD2]:
         if not self._cdc:
-            if self.change_data_capture == "nocdc":
+            if self.change_data_capture in ["nocdc", "none"]:
                 cdc = NoCDC(self.step, self.topic, self.item, spark=self.spark)
             elif self.change_data_capture == "scd1":
                 cdc = SCD1(self.step, self.topic, self.item, spark=self.spark)
@@ -227,7 +306,7 @@ class Configurator(ABC):
     @property
     def mode(self) -> AllowedModes:
         if not self._mode:
-            _mode = self.options.job.get("mode")
+            _mode = self.options.mode
             assert _mode is not None
             self._mode = cast(AllowedModes, _mode)
         return self._mode
@@ -288,9 +367,9 @@ class Configurator(ABC):
             DEFAULT_LOGGER.debug("could not vacuum (memory)", extra={"label": self})
         else:
-            job = self.options.table.get("retention_days")
-            step = self.step_conf.get("table_options", {}).get("retention_days", None)
-            runtime = CONF_RUNTIME.get("options", {}).get("retention_days")
+            job = self.table_options.retention_days if self.table_options else None
+            step = self.step_table_options.retention_days if self.step_table_options else None
+            runtime = self.runtime_options.retention_days
             if job is not None:
                 retention_days = job

fabricks/core/jobs/base/generator.py CHANGED Viewed

@@ -6,10 +6,10 @@ from pyspark.sql.functions import lit
 from fabricks.cdc import NoCDC
 from fabricks.context.log import DEFAULT_LOGGER
-from fabricks.core.jobs.base._types import JobDependency
 from fabricks.core.jobs.base.configurator import Configurator
 from fabricks.metastore.table import SchemaDiff
 from fabricks.metastore.view import create_or_replace_global_temp_view
+from fabricks.models import JobDependency
 class Generator(Configurator):
@@ -31,9 +31,9 @@ class Generator(Configurator):
         If the schema folder exists, it will be deleted. The method also calls the `rm_checkpoints` method to remove any checkpoints associated with the generator.
         """
-        if self.paths.schema.exists():
+        if self.paths.to_schema.exists():
             DEFAULT_LOGGER.info("delete schema folder", extra={"label": self})
-            self.paths.schema.rm()
+            self.paths.to_schema.rm()
         self.rm_checkpoints()
     def rm_checkpoints(self):
@@ -42,9 +42,9 @@ class Generator(Configurator):
         This method checks if the checkpoints folder exists and deletes it if it does.
         """
-        if self.paths.checkpoints.exists():
+        if self.paths.to_checkpoints.exists():
             DEFAULT_LOGGER.info("delete checkpoints folder", extra={"label": self})
-            self.paths.checkpoints.rm()
+            self.paths.to_checkpoints.rm()
     def rm_commit(self, id: Union[str, int]):
         """
@@ -56,7 +56,7 @@ class Generator(Configurator):
         Returns:
             None
         """
-        path = self.paths.commits.joinpath(str(id))
+        path = self.paths.to_commits.joinpath(str(id))
         if path.exists():
             DEFAULT_LOGGER.warning(f"delete commit {id}", extra={"label": self})
             path.rm()
@@ -91,7 +91,7 @@ class Generator(Configurator):
         Returns:
                 None
         """
-        if self.options.job.get("no_drop"):
+        if self.options.no_drop:
             raise ValueError("no_drop is set, cannot drop the job")
         try:
@@ -167,7 +167,7 @@ class Generator(Configurator):
         ...
     def _get_clustering_columns(self, df: DataFrame) -> Optional[List[str]]:
-        columns = self.options.table.get_list("cluster_by")
+        columns = self.table_options.cluster_by or [] if self.table_options else []
         if columns:
             return columns
@@ -205,16 +205,16 @@ class Generator(Configurator):
             identity = False
             # first take from job options, then from step options
-            job_powerbi = self.options.table.get_boolean("powerbi", None)
-            step_powerbi = self.step_conf.get("table_options", {}).get("powerbi", None)
+            job_powerbi = self.table_options.powerbi if self.table_options else None
+            step_powerbi = self.step_conf.table_options.powerbi if self.step_conf.table_options else None
             if job_powerbi is not None:
                 powerbi = job_powerbi
             elif step_powerbi is not None:
                 powerbi = step_powerbi
             # first take from job options, then from step options
-            job_masks = self.options.table.get("masks", None)
-            step_masks = self.step_conf.get("table_options", {}).get("masks", None)
+            job_masks = self.table_options.masks if self.table_options else None
+            step_masks = self.step_conf.table_options.masks if self.step_conf.table_options else None
             if job_masks is not None:
                 masks = job_masks
             elif step_masks is not None:
@@ -222,7 +222,9 @@ class Generator(Configurator):
             else:
                 masks = None
-            maximum_compatibility = self.options.table.get_boolean("maximum_compatibility", False)
+            maximum_compatibility = self.table_options.maximum_compatibility if self.table_options else False
+            default_properties: dict[str, str | bool | int] = {}
             if maximum_compatibility:
                 default_properties = {
@@ -251,11 +253,13 @@ class Generator(Configurator):
             if "__identity" in df.columns:
                 identity = False
             else:
-                identity = self.options.table.get_boolean("identity", False)
+                identity = self.table_options.identity if self.table_options else False
             # first take from job options, then from step options
-            liquid_clustering_job = self.options.table.get("liquid_clustering", None)
-            liquid_clustering_step = self.step_conf.get("table_options", {}).get("liquid_clustering", None)
+            liquid_clustering_job = self.table_options.liquid_clustering if self.table_options else None
+            liquid_clustering_step = (
+                self.step_conf.table_options.liquid_clustering if self.step_conf.table_options else None
+            )
             if liquid_clustering_job is not None:
                 liquid_clustering = liquid_clustering_job
             elif liquid_clustering_step:
@@ -278,24 +282,24 @@ class Generator(Configurator):
             if liquid_clustering is None:
                 cluster_by = None
-                partition_by = self.options.table.get_list("partition_by")
+                partition_by = self.table_options.partition_by or [] if self.table_options else []
                 if partition_by:
                     partitioning = True
             properties = None
             if not powerbi:
                 # first take from job options, then from step options
-                if self.options.table.get_dict("properties"):
-                    properties = self.options.table.get_dict("properties")
-                elif self.step_conf.get("table_options", {}).get("properties", {}):
-                    properties = self.step_conf.get("table_options", {}).get("properties", {})
+                if self.table_options and self.table_options.properties:
+                    properties = self.table_options.properties
+                elif self.step_conf.table_options and self.step_conf.table_options.properties:
+                    properties = self.step_conf.table_options.properties
             if properties is None:
                 properties = default_properties
-            primary_key = self.options.table.get_dict("primary_key")
-            foreign_keys = self.options.table.get_dict("foreign_keys")
-            comments = self.options.table.get_dict("comments")
+            primary_key = self.table_options.primary_key or {} if self.table_options else {}
+            foreign_keys = self.table_options.foreign_keys or {} if self.table_options else {}
+            comments = self.table_options.comments or {} if self.table_options else {}
             # if dataframe, reference is passed (BUG)
             name = f"{self.step}_{self.topic}_{self.item}__init"
@@ -332,7 +336,7 @@ class Generator(Configurator):
                     dummy_df = dummy_df.select("__metadata")
                     df = df.unionByName(dummy_df, allowMissingColumns=True)
-                    path = self.paths.checkpoints.append("__init")
+                    path = self.paths.to_checkpoints.append("__init")
                     if path.exists():
                         path.rm()
@@ -347,12 +351,12 @@ class Generator(Configurator):
                 else:
                     _create_table(df)
-                constraints = self.options.table.get_dict("constraints")
+                constraints = self.table_options.constraints or {} if self.table_options else {}
                 if constraints:
                     for key, value in constraints.items():
-                        self.table.add_constraint(name=key, expr=value)
+                        self.table.add_constraint(name=key, expr=str(value))
-                comment = self.options.table.get("comment")
+                comment = self.table_options.comment if self.table_options else None
                 if comment:
                     self.table.add_table_comment(comment=comment)
@@ -382,7 +386,7 @@ class Generator(Configurator):
                 df = self.base_transform(df)
                 if self.stream:
-                    path = self.paths.checkpoints.append("__schema")
+                    path = self.paths.to_checkpoints.append("__schema")
                     query = (
                         df.writeStream.foreachBatch(_update_schema)
                         .option("checkpointLocation", path.string)
@@ -415,15 +419,15 @@ class Generator(Configurator):
             self.table.drop_comments()
             if table:
-                comment = self.options.table.get("comment")
+                comment = self.table_options.comment if self.table_options else None
                 if comment:
                     self.table.add_table_comment(comment=comment)
             if columns:
-                comments = self.options.table.get_dict("comments")
+                comments = self.table_options.comments or {} if self.table_options else {}
                 if comments:
                     for col, comment in comments.items():
-                        self.table.add_column_comment(column=col, comment=comment)
+                        self.table.add_column_comment(column=col, comment=str(comment))
     def get_differences_with_deltatable(self, df: Optional[DataFrame] = None):
         if df is None:
@@ -456,8 +460,8 @@ class Generator(Configurator):
         enable = False
         # first take from job options, then from step options
-        enable_job = self.options.table.get_boolean("liquid_clustering", None)
-        enable_step = self.step_conf.get("table_options", {}).get("liquid_clustering", None)
+        enable_job = self.table_options.liquid_clustering if self.table_options else None
+        enable_step = self.step_conf.table_options.liquid_clustering if self.step_conf.table_options else None
         if enable_job is not None:
             enable = enable_job
         elif enable_step:

fabricks 3.0.19__py3-none-any.whl → 4.0.1__py3-none-any.whl

fabricks 3.0.19py3-none-any.whl → 4.0.1py3-none-any.whl