PyPI - fabricks - Versions diffs - 3.0.18__py3-none-any.whl → 4.0.0__py3-none-any.whl - Mend

fabricks 3.0.18py3-none-any.whl → 4.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

fabricks/api/context.py +15 -3
fabricks/api/notebooks/schedule.py +2 -3
fabricks/api/parsers.py +2 -1
fabricks/api/utils.py +3 -1
fabricks/cdc/__init__.py +1 -2
fabricks/cdc/base/__init__.py +1 -2
fabricks/cdc/base/_types.py +5 -3
fabricks/cdc/base/configurator.py +5 -0
fabricks/cdc/base/generator.py +7 -3
fabricks/cdc/base/merger.py +2 -0
fabricks/cdc/base/processor.py +15 -0
fabricks/cdc/templates/README.md +490 -0
fabricks/cdc/templates/ctes/base.sql.jinja +1 -0
fabricks/cdc/templates/ctes/current.sql.jinja +4 -0
fabricks/cdc/templates/merges/scd1.sql.jinja +6 -0
fabricks/cdc/templates/merges/scd2.sql.jinja +6 -0
fabricks/cdc/templates/queries/context.sql.jinja +104 -96
fabricks/cdc/templates/query.sql.jinja +1 -1
fabricks/context/__init__.py +13 -1
fabricks/context/config.py +13 -122
fabricks/context/log.py +92 -1
fabricks/context/runtime.py +35 -69
fabricks/context/spark_session.py +8 -7
fabricks/context/utils.py +26 -39
fabricks/core/__init__.py +2 -2
fabricks/core/dags/base.py +5 -5
fabricks/core/dags/processor.py +2 -3
fabricks/core/extenders.py +1 -1
fabricks/core/job_schema.py +26 -16
fabricks/core/jobs/__init__.py +1 -7
fabricks/core/jobs/base/README.md +1545 -0
fabricks/core/jobs/base/__init__.py +1 -8
fabricks/core/jobs/base/checker.py +7 -7
fabricks/core/jobs/base/configurator.py +142 -63
fabricks/core/jobs/base/generator.py +38 -34
fabricks/core/jobs/base/invoker.py +48 -63
fabricks/core/jobs/base/processor.py +13 -28
fabricks/core/jobs/bronze.py +88 -38
fabricks/core/jobs/get_job.py +3 -6
fabricks/core/jobs/get_job_conf.py +19 -68
fabricks/core/jobs/get_jobs.py +10 -11
fabricks/core/jobs/get_schedules.py +3 -17
fabricks/core/jobs/gold.py +96 -43
fabricks/core/jobs/silver.py +42 -22
fabricks/core/masks.py +11 -8
fabricks/core/parsers/__init__.py +0 -2
fabricks/core/parsers/base.py +10 -10
fabricks/core/parsers/decorator.py +1 -1
fabricks/core/parsers/get_parser.py +4 -5
fabricks/core/schedules/process.py +1 -4
fabricks/core/steps/base.py +27 -17
fabricks/core/steps/get_step.py +2 -4
fabricks/core/steps/get_step_conf.py +3 -7
fabricks/core/udfs.py +9 -8
fabricks/core/views.py +2 -2
fabricks/deploy/__init__.py +27 -16
fabricks/deploy/masks.py +1 -1
fabricks/deploy/notebooks.py +19 -16
fabricks/deploy/schedules.py +1 -1
fabricks/deploy/tables.py +66 -49
fabricks/deploy/udfs.py +2 -2
fabricks/deploy/views.py +15 -16
fabricks/metastore/database.py +3 -3
fabricks/metastore/table.py +103 -68
fabricks/models/__init__.py +125 -0
fabricks/models/common.py +79 -0
fabricks/models/config.py +225 -0
fabricks/models/dependency.py +50 -0
fabricks/models/job.py +157 -0
fabricks/models/path.py +17 -0
fabricks/models/runtime.py +182 -0
fabricks/models/schedule.py +21 -0
fabricks/models/step.py +103 -0
fabricks/models/table.py +77 -0
fabricks/{core/jobs/get_job_id.py → models/utils.py} +2 -0
fabricks/utils/helpers.py +6 -5
fabricks/utils/log.py +25 -6
fabricks/utils/path.py +269 -102
fabricks/utils/pip.py +7 -7
fabricks/utils/read/read.py +23 -22
fabricks/utils/read/read_yaml.py +2 -2
fabricks/utils/write/delta.py +4 -4
fabricks/utils/write/stream.py +2 -2
{fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/METADATA +9 -4
{fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/RECORD +86 -83
fabricks/context/_types.py +0 -137
fabricks/context/helpers.py +0 -63
fabricks/core/jobs/base/_types.py +0 -284
fabricks/core/parsers/_types.py +0 -6
fabricks/utils/fdict.py +0 -240
fabricks/utils/pydantic.py +0 -94
fabricks/utils/schema/__init__.py +0 -7
fabricks/utils/schema/get_json_schema_for_type.py +0 -161
fabricks/utils/schema/get_schema_for_type.py +0 -99
{fabricks-3.0.18.dist-info → fabricks-4.0.0.dist-info}/WHEEL +0 -0

fabricks/core/parsers/decorator.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from typing import Callable, Optional
-from fabricks.core.parsers._types import ParserOptions
 from fabricks.core.parsers.base import PARSERS, BaseParser
+from fabricks.models import ParserOptions
 def parser(name: str):

fabricks/core/parsers/get_parser.py CHANGED Viewed

@@ -1,12 +1,12 @@
-from typing import Optional
+from typing import Callable, Optional
 from fabricks.context import PATH_PARSERS
-from fabricks.core.parsers._types import ParserOptions
 from fabricks.core.parsers.base import PARSERS, BaseParser
+from fabricks.models import ParserOptions
 from fabricks.utils.helpers import load_module_from_path
-def get_parser(name: str, parser_options: Optional[ParserOptions] = None) -> BaseParser:
+def get_parser(name: str, parser_options: Optional[ParserOptions] = None) -> Callable:
     if name not in ["json", "parquet", "avro", "csv", "tsv", "delta", "table"]:
         path = PATH_PARSERS.joinpath(name).append(".py")
         assert path.exists(), f"parser not found ({path})"
@@ -17,5 +17,4 @@ def get_parser(name: str, parser_options: Optional[ParserOptions] = None) -> Bas
     else:
         parser = BaseParser(parser_options, name)
-    assert parser
-    return parser
+    return parser.get_data

fabricks/core/schedules/process.py CHANGED Viewed

@@ -1,9 +1,6 @@
-from typing import Union
 from fabricks.core.dags.processor import DagProcessor
-from fabricks.core.jobs.base._types import TStep
-def process(schedule_id: str, schedule: str, step: Union[TStep, str]):
+def process(schedule_id: str, schedule: str, step: str):
     with DagProcessor(schedule_id=schedule_id, schedule=schedule, step=step) as p:
         p.process()

fabricks/core/steps/base.py CHANGED Viewed

@@ -4,24 +4,34 @@ from typing import Dict, Iterable, List, Literal, Optional, Tuple, Union, cast
 from pyspark.sql import DataFrame
 from pyspark.sql.functions import expr, md5
 from pyspark.sql.types import Row
+from sparkdantic import create_spark_schema
 from typing_extensions import deprecated
 from fabricks.cdc import NoCDC
-from fabricks.context import CONF_RUNTIME, LOGLEVEL, PATHS_RUNTIME, PATHS_STORAGE, SPARK, STEPS
+from fabricks.context import (
+    CONF_RUNTIME,
+    LOGLEVEL,
+    PATHS_RUNTIME,
+    PATHS_STORAGE,
+    SPARK,
+    STEPS,
+    Bronzes,
+    Golds,
+    Silvers,
+)
 from fabricks.context.log import DEFAULT_LOGGER
-from fabricks.core.jobs.base._types import Bronzes, Golds, SchemaDependencies, Silvers, TStep
 from fabricks.core.jobs.get_job import get_job
 from fabricks.core.steps._types import Timeouts
 from fabricks.core.steps.get_step_conf import get_step_conf
 from fabricks.metastore.database import Database
 from fabricks.metastore.table import Table
+from fabricks.models import SchemaDependencies, StepBronzeOptions, StepGoldOptions, StepSilverOptions
 from fabricks.utils.helpers import run_in_parallel
 from fabricks.utils.read.read_yaml import read_yaml
-from fabricks.utils.schema import get_schema_for_type
 class BaseStep:
-    def __init__(self, step: Union[TStep, str]):
+    def __init__(self, step: str):
         self.name = cast(str, step)
         if self.name in Bronzes:
@@ -45,7 +55,7 @@ class BaseStep:
         self.database = Database(self.name)
     _conf: Optional[dict] = None
-    _options: Optional[dict] = None
+    _options: Optional[Union[StepBronzeOptions, StepSilverOptions, StepGoldOptions]] = None
     _workers: Optional[int] = None
     _timeouts: Optional[Timeouts] = None
@@ -53,18 +63,18 @@ class BaseStep:
     @property
     def workers(self):
         if not self._workers:
-            w = self.options.get("workers")
+            w = self.options.workers
             if w is None:
-                w = CONF_RUNTIME.get("options", {}).get("workers")
+                w = CONF_RUNTIME.options.workers
             assert w is not None
             self._workers = cast(int, w)
         return self._workers
     def _get_timeout(self, what: str) -> int:
-        t = self.options.get("timeouts", {}).get(what, None)
+        t = getattr(self.options.timeouts, what, None)
         if t is None:
-            t = CONF_RUNTIME.get("options", {}).get("timeouts", {}).get(what)
+            t = getattr(CONF_RUNTIME.options.timeouts, what)
         assert t is not None
         return int(t)
@@ -82,18 +92,18 @@ class BaseStep:
     @property
     def conf(self) -> dict:
         if not self._conf:
-            _conf = [s for s in STEPS if s.get("name") == self.name][0]
+            _conf = [s for s in STEPS if s.name == self.name][0]
             assert _conf is not None
-            self._conf = cast(dict[str, str], _conf)
+            self._conf = _conf.model_dump()
         return self._conf
     @property
-    def options(self) -> dict:
+    def options(self):
         if not self._options:
-            o = self.conf.get("options")
-            assert o is not None
-            self._options = cast(dict[str, str], o)
+            _step = [s for s in STEPS if s.name == self.name][0]
+            assert _step is not None
+            self._options = _step.options
         return self._options
@@ -209,7 +219,7 @@ class BaseStep:
         try:
             conf = get_step_conf(self.name)
-            schema = get_schema_for_type(conf)
+            schema = create_spark_schema(conf)
             jobs = self.get_jobs_iter(topic=topic)
             df = SPARK.createDataFrame(jobs, schema=schema)  # type: ignore
@@ -392,7 +402,7 @@ class BaseStep:
             DEFAULT_LOGGER.setLevel(LOGLEVEL)
     def update_steps_list(self):
-        order = self.options.get("order", 0)
+        order = self.options.order or 0
         df = SPARK.sql(f"select '{self.expand}' as expand, '{self.name}' as step, '{order}' :: int as `order`")
         NoCDC("fabricks", "steps").delete_missing(df, keys=["step"], update_where=f"step = '{self.name}'")

fabricks/core/steps/get_step.py CHANGED Viewed

@@ -1,10 +1,8 @@
-from typing import Union
-from fabricks.core.jobs.base._types import Steps, TStep
+from fabricks.context import Steps
 from fabricks.core.steps.base import BaseStep
-def get_step(step: Union[TStep, str]) -> BaseStep:
+def get_step(step: str) -> BaseStep:
     assert step in Steps, f"{step} not found"
     base_step = BaseStep(step=step)
     return base_step

fabricks/core/steps/get_step_conf.py CHANGED Viewed

@@ -1,12 +1,8 @@
-from typing import Union, cast
+from fabricks.context import Bronzes, Golds, Silvers
+from fabricks.models import JobConfBronze, JobConfGold, JobConfSilver
-from fabricks.core.jobs.base._types import Bronzes, Golds, JobConfBronze, JobConfGold, JobConfSilver, Silvers, TStep
-def get_step_conf(step: Union[TStep, str]):
-    if isinstance(step, str):
-        step = cast(TStep, step)
+def get_step_conf(step: str):
     if step in Bronzes:
         expand = "bronze"
     elif step in Silvers:

fabricks/core/udfs.py CHANGED Viewed

@@ -5,26 +5,27 @@ from typing import Callable, List, Optional
 from pyspark.sql import SparkSession
-from fabricks.context import CATALOG, IS_UNITY_CATALOG, PATH_UDFS, SPARK, CONF_RUNTIME
+from fabricks.context import CATALOG, CONF_RUNTIME, IS_UNITY_CATALOG, PATH_UDFS, SPARK
 from fabricks.context.log import DEFAULT_LOGGER
 UDFS: dict[str, Callable] = {}
-udf_schema = CONF_RUNTIME.get("udf_options", {}).get("schema", "default")
-udf_prefix = CONF_RUNTIME.get("udf_options", {}).get("prefix", "udf_")
+UDF_SCHEMA = CONF_RUNTIME.udf_options.schema_name or "default" if CONF_RUNTIME.udf_options else "default"
+UDF_PREFIX = CONF_RUNTIME.udf_options.prefix or "udf_" if CONF_RUNTIME.udf_options else "udf_"
 def register_all_udfs(extension: Optional[str] = None, override: bool = False):
     """
     Register all user-defined functions (UDFs).
     """
-    DEFAULT_LOGGER.info("register udfs")
+    DEFAULT_LOGGER.info("register udfs", extra={"label": "fabricks"})
     for udf in get_udfs(extension=extension):
         split = udf.split(".")
         try:
             register_udf(udf=split[0], extension=split[1], override=override)
         except Exception as e:
-            DEFAULT_LOGGER.exception(f"could not register udf {udf}", exc_info=e)
+            DEFAULT_LOGGER.exception(f"could not register udf {udf}", exc_info=e, extra={"label": "fabricks"})
 def get_udfs(extension: Optional[str] = None) -> List[str]:
@@ -49,12 +50,12 @@ def is_registered(udf: str, spark: Optional[SparkSession] = None) -> bool:
         spark = SPARK
     assert spark is not None
-    df = spark.sql(f"show user functions in {udf_schema}")
+    df = spark.sql(f"show user functions in {UDF_SCHEMA}")
     if CATALOG:
-        df = df.where(f"function == '{CATALOG}.{udf_schema}.{udf_prefix}{udf}'")
+        df = df.where(f"function == '{CATALOG}.{UDF_SCHEMA}.{UDF_PREFIX}{udf}'")
     else:
-        df = df.where(f"function == 'spark_catalog.{udf_schema}.{udf_prefix}{udf}'")
+        df = df.where(f"function == 'spark_catalog.{UDF_SCHEMA}.{UDF_PREFIX}{udf}'")
     return not df.isEmpty()

fabricks/core/views.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from fabricks.context import PATH_VIEWS, SPARK
 from fabricks.context.log import DEFAULT_LOGGER
-from fabricks.utils.path import Path
+from fabricks.utils.path import GitPath
 from fabricks.utils.sqlglot import fix as fix_sql
-def create_or_replace_view_internal(path: Path):
+def create_or_replace_view_internal(path: GitPath):
     sql = path.get_sql()
     file_name = path.get_file_name().split(".")[0]

fabricks/deploy/__init__.py CHANGED Viewed

@@ -1,10 +1,9 @@
 import logging
-from typing import List, Optional, Union, cast
+from typing import Optional, Union
-from fabricks.context import FABRICKS_STORAGE
+from fabricks.context import FABRICKS_STORAGE, Steps
 from fabricks.context.log import DEFAULT_LOGGER
-from fabricks.core.jobs.base._types import Steps, TStep
-from fabricks.core.steps.base import BaseStep
+from fabricks.core.steps import get_step
 from fabricks.deploy.masks import deploy_masks
 from fabricks.deploy.notebooks import deploy_notebooks
 from fabricks.deploy.schedules import deploy_schedules
@@ -17,8 +16,8 @@ from fabricks.metastore.database import Database
 class Deploy:
     @staticmethod
-    def tables(drop: bool = False):
-        deploy_tables(drop=drop)
+    def tables(drop: bool = False, update: bool = False):
+        deploy_tables(drop=drop, update=update)
     @staticmethod
     def views():
@@ -33,16 +32,30 @@ class Deploy:
         deploy_masks(override=override)
     @staticmethod
-    def notebooks():
-        deploy_notebooks()
+    def notebooks(override: bool = False):
+        deploy_notebooks(overwrite=override)
     @staticmethod
     def schedules():
         deploy_schedules()
     @staticmethod
-    def armageddon(steps: Optional[Union[TStep, List[TStep], str, List[str]]], nowait: bool = False):
-        DEFAULT_LOGGER.warning("!💥 armageddon 💥!")
+    def step(step: str):
+        Deploy.tables()
+        s = get_step(step)
+        s.create()
+        Deploy.views()
+        Deploy.schedules()
+    @staticmethod
+    def job(step: str):
+        s = get_step(step)
+        s.create()
+    @staticmethod
+    def armageddon(steps: Optional[Union[str, list[str]]] = None, nowait: bool = False):
+        DEFAULT_LOGGER.warning("!💥 armageddon 💥!", extra={"label": "fabricks"})
         print_atomic_bomb(nowait=nowait)
         DEFAULT_LOGGER.setLevel(logging.INFO)
@@ -52,17 +65,15 @@ class Deploy:
         assert steps is not None
         if isinstance(steps, str):
-            steps = [cast(TStep, steps)]
-        elif isinstance(steps, List):
-            steps = [cast(TStep, s) for s in steps]
-        elif isinstance(steps, TStep):
             steps = [steps]
+        elif isinstance(steps, list):
+            steps = [s for s in steps]
         fabricks = Database("fabricks")
         fabricks.drop()
         for s in steps:
-            step = BaseStep(s)
+            step = get_step(s)
             step.drop()
         tmp = FABRICKS_STORAGE.joinpath("tmp")
@@ -85,7 +96,7 @@ class Deploy:
         Deploy.notebooks()
         for s in steps:
-            step = BaseStep(s)
+            step = get_step(s)
             step.create()
         Deploy.views()

fabricks/deploy/masks.py CHANGED Viewed

@@ -3,6 +3,6 @@ from fabricks.core.masks import register_all_masks
 def deploy_masks(override: bool = True):
-    DEFAULT_LOGGER.info("create or replace masks")
+    DEFAULT_LOGGER.info("create or replace masks", extra={"label": "fabricks"})
     register_all_masks(override=override)

fabricks/deploy/notebooks.py CHANGED Viewed

@@ -13,7 +13,7 @@ from fabricks.context.log import DEFAULT_LOGGER
 def deploy_notebook(notebook: str):
     from fabricks.api import notebooks
-    DEFAULT_LOGGER.debug(f"overwrite {notebook}")
+    DEFAULT_LOGGER.debug(f"overwrite {notebook}", extra={"label": "fabricks"})
     w = WorkspaceClient()
@@ -34,21 +34,24 @@ def deploy_notebook(notebook: str):
     )
-def deploy_notebooks():
-    DEFAULT_LOGGER.info("overwrite notebooks")
-    _create_dir_if_not_exists()
-    _clean_dir()
-    for n in [
-        "cluster",
-        "initialize",
-        "process",
-        "schedule",
-        "run",
-        "terminate",
-    ]:
-        deploy_notebook(notebook=n)
+def deploy_notebooks(overwrite: bool = False):
+    if overwrite:
+        DEFAULT_LOGGER.warning("overwrite notebooks", extra={"label": "fabricks"})
+        _create_dir_if_not_exists()
+        _clean_dir()
+        for n in [
+            "cluster",
+            "initialize",
+            "process",
+            "schedule",
+            "run",
+            "terminate",
+        ]:
+            deploy_notebook(notebook=n)
+    else:
+        DEFAULT_LOGGER.info("deploy notebooks skipped (overwrite=False)", extra={"label": "fabricks"})
 def _create_dir_if_not_exists():

fabricks/deploy/schedules.py CHANGED Viewed

@@ -4,7 +4,7 @@ from fabricks.core.views import create_or_replace_views as create_or_replace_cus
 def deploy_schedules():
-    DEFAULT_LOGGER.info("create or replace schedules")
+    DEFAULT_LOGGER.info("create or replace schedules", extra={"label": "fabricks"})
     create_or_replace_custom_views()
     create_or_replace_views()

fabricks/deploy/tables.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from pyspark.sql.types import LongType, StringType, StructField, StructType, TimestampType
+from pyspark.sql.types import LongType, StringType, StructField, StructType, TimestampType, VariantType
 from fabricks.cdc import NoCDC
 from fabricks.context import SPARK
@@ -6,77 +6,94 @@ from fabricks.context.log import DEFAULT_LOGGER
 from fabricks.metastore.table import Table
-def deploy_tables(drop: bool = False):
-    DEFAULT_LOGGER.info("create or replace fabricks (default) tables")
+def deploy_tables(drop: bool = False, update: bool = False):
+    DEFAULT_LOGGER.info("create or replace fabricks (default) tables", extra={"label": "fabricks"})
-    create_table_log(drop=drop)
-    create_table_dummy(drop=drop)
-    create_table_step(drop=drop)
+    create_table_log(drop=drop, update=update)
+    create_table_dummy(drop=drop, update=update)
+    create_table_step(drop=drop, update=update)
-def create_table_step(drop: bool = False):
+def create_table_step(drop: bool = False, update: bool = False):
     table = Table("fabricks", "steps")
+    schema = StructType(
+        [
+            StructField("step", StringType(), True),
+            StructField("expand", StringType(), True),
+            StructField("order", LongType(), True),
+        ]
+    )
     if drop:
         table.drop()
     if not table.exists():
-        schema = StructType(
-            [
-                StructField("step", StringType(), True),
-                StructField("expand", StringType(), True),
-                StructField("order", LongType(), True),
-            ]
+        table.create(
+            schema=schema,
+            partitioning=True,
+            partition_by=["expand"],
         )
-        table.create(schema=schema, partitioning=True, partition_by=["expand"])
+    elif update:
+        table.overwrite_schema(schema=schema)
-def create_table_log(drop: bool = False):
+def create_table_log(drop: bool = False, update: bool = False):
     table = Table("fabricks", "logs")
+    schema = StructType(
+        [
+            StructField("schedule_id", StringType(), True),
+            StructField("schedule", StringType(), True),
+            StructField("step", StringType(), True),
+            StructField("job_id", StringType(), True),
+            StructField("job", StringType(), True),
+            StructField("notebook_id", StringType(), True),
+            StructField("level", StringType(), True),
+            StructField("status", StringType(), True),
+            StructField("timestamp", TimestampType(), True),
+            StructField(
+                "exception",
+                StructType(
+                    [
+                        StructField("type", StringType(), True),
+                        StructField("message", StringType(), True),
+                        StructField("traceback", StringType(), True),
+                    ]
+                ),
+                True,
+            ),
+            StructField("json", VariantType(), True),
+        ]
+    )
     if drop:
         table.drop()
     if not table.exists():
-        schema = StructType(
-            [
-                StructField("schedule_id", StringType(), True),
-                StructField("schedule", StringType(), True),
-                StructField("step", StringType(), True),
-                StructField("job_id", StringType(), True),
-                StructField("job", StringType(), True),
-                StructField("notebook_id", StringType(), True),
-                StructField("level", StringType(), True),
-                StructField("status", StringType(), True),
-                StructField("timestamp", TimestampType(), True),
-                StructField(
-                    "exception",
-                    StructType(
-                        [
-                            StructField("type", StringType(), True),
-                            StructField("message", StringType(), True),
-                            StructField("traceback", StringType(), True),
-                        ]
-                    ),
-                    True,
-                ),
-            ]
+        table.create(
+            schema=schema,
+            partitioning=True,
+            partition_by=["schedule_id", "step"],
         )
-        table.create(schema=schema, partitioning=True, partition_by=["schedule_id", "step"])
+    elif update:
+        table.overwrite_schema(schema=schema)
-def create_table_dummy(drop: bool = False):
+def create_table_dummy(drop: bool = False, update: bool = False):
     cdc = NoCDC("fabricks", "dummy")
+    df = SPARK.sql(
+        """
+        select
+        1 as __key,
+        md5('1') as __hash,
+        cast('1900-01-01' as timestamp) as __valid_from,
+        cast('9999-12-31' as timestamp) as __valid_to
+        """
+    )
     if drop:
         cdc.drop()
     if not cdc.table.exists():
-        df = SPARK.sql(
-            """
-            select
-            1 as __key,
-            md5('1') as __hash,
-            cast('1900-01-01' as timestamp) as __valid_from,
-            cast('9999-12-31' as timestamp) as __valid_to
-            """
-        )
         cdc.overwrite(df)
+    elif update:
+        cdc.overwrite_schema(df)

fabricks/deploy/udfs.py CHANGED Viewed

@@ -5,7 +5,7 @@ from fabricks.utils.sqlglot import fix as fix_sql
 def deploy_udfs(override: bool = True):
-    DEFAULT_LOGGER.info("create or replace udfs")
+    DEFAULT_LOGGER.info("create or replace udfs", extra={"label": "fabricks"})
     register_all_udfs(extension="sql", override=override)
     create_or_replace_udf_job_id()
@@ -15,5 +15,5 @@ def create_or_replace_udf_job_id():
     sql = "create or replace function fabricks.udf_job_id(job string) returns string return md5(job)"
     sql = fix_sql(sql)
-    DEFAULT_LOGGER.debug("create or replace fabricks.udf_job_id", extra={"sql": sql})
+    DEFAULT_LOGGER.debug("create or replace fabricks.udf_job_id", extra={"sql": sql, "label": "fabricks"})
     SPARK.sql(sql)

fabricks 3.0.18__py3-none-any.whl → 4.0.0__py3-none-any.whl

fabricks 3.0.18py3-none-any.whl → 4.0.0py3-none-any.whl