fabricks 3.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. fabricks/__init__.py +0 -0
  2. fabricks/api/__init__.py +11 -0
  3. fabricks/api/cdc/__init__.py +6 -0
  4. fabricks/api/cdc/nocdc.py +3 -0
  5. fabricks/api/cdc/scd1.py +3 -0
  6. fabricks/api/cdc/scd2.py +3 -0
  7. fabricks/api/context.py +27 -0
  8. fabricks/api/core.py +4 -0
  9. fabricks/api/deploy.py +3 -0
  10. fabricks/api/exceptions.py +19 -0
  11. fabricks/api/extenders.py +3 -0
  12. fabricks/api/job_schema.py +3 -0
  13. fabricks/api/log.py +3 -0
  14. fabricks/api/masks.py +3 -0
  15. fabricks/api/metastore/__init__.py +10 -0
  16. fabricks/api/metastore/database.py +3 -0
  17. fabricks/api/metastore/table.py +3 -0
  18. fabricks/api/metastore/view.py +6 -0
  19. fabricks/api/notebooks/__init__.py +0 -0
  20. fabricks/api/notebooks/cluster.py +6 -0
  21. fabricks/api/notebooks/initialize.py +42 -0
  22. fabricks/api/notebooks/process.py +54 -0
  23. fabricks/api/notebooks/run.py +59 -0
  24. fabricks/api/notebooks/schedule.py +75 -0
  25. fabricks/api/notebooks/terminate.py +31 -0
  26. fabricks/api/parsers.py +3 -0
  27. fabricks/api/schedules.py +3 -0
  28. fabricks/api/udfs.py +3 -0
  29. fabricks/api/utils.py +9 -0
  30. fabricks/api/version.py +3 -0
  31. fabricks/api/views.py +6 -0
  32. fabricks/cdc/__init__.py +14 -0
  33. fabricks/cdc/base/__init__.py +4 -0
  34. fabricks/cdc/base/_types.py +10 -0
  35. fabricks/cdc/base/cdc.py +5 -0
  36. fabricks/cdc/base/configurator.py +223 -0
  37. fabricks/cdc/base/generator.py +177 -0
  38. fabricks/cdc/base/merger.py +110 -0
  39. fabricks/cdc/base/processor.py +471 -0
  40. fabricks/cdc/cdc.py +5 -0
  41. fabricks/cdc/nocdc.py +20 -0
  42. fabricks/cdc/scd.py +22 -0
  43. fabricks/cdc/scd1.py +15 -0
  44. fabricks/cdc/scd2.py +15 -0
  45. fabricks/cdc/templates/__init__.py +0 -0
  46. fabricks/cdc/templates/ctes/base.sql.jinja +35 -0
  47. fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
  48. fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
  49. fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
  50. fabricks/cdc/templates/ctes/rectify.sql.jinja +113 -0
  51. fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
  52. fabricks/cdc/templates/filter.sql.jinja +4 -0
  53. fabricks/cdc/templates/filters/final.sql.jinja +4 -0
  54. fabricks/cdc/templates/filters/latest.sql.jinja +17 -0
  55. fabricks/cdc/templates/filters/update.sql.jinja +30 -0
  56. fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
  57. fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
  58. fabricks/cdc/templates/merge.sql.jinja +3 -0
  59. fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
  60. fabricks/cdc/templates/merges/scd1.sql.jinja +73 -0
  61. fabricks/cdc/templates/merges/scd2.sql.jinja +54 -0
  62. fabricks/cdc/templates/queries/__init__.py +0 -0
  63. fabricks/cdc/templates/queries/context.sql.jinja +186 -0
  64. fabricks/cdc/templates/queries/final.sql.jinja +1 -0
  65. fabricks/cdc/templates/queries/nocdc/complete.sql.jinja +10 -0
  66. fabricks/cdc/templates/queries/nocdc/update.sql.jinja +34 -0
  67. fabricks/cdc/templates/queries/scd1.sql.jinja +85 -0
  68. fabricks/cdc/templates/queries/scd2.sql.jinja +98 -0
  69. fabricks/cdc/templates/query.sql.jinja +15 -0
  70. fabricks/context/__init__.py +72 -0
  71. fabricks/context/_types.py +133 -0
  72. fabricks/context/config/__init__.py +92 -0
  73. fabricks/context/config/utils.py +53 -0
  74. fabricks/context/log.py +77 -0
  75. fabricks/context/runtime.py +117 -0
  76. fabricks/context/secret.py +103 -0
  77. fabricks/context/spark_session.py +82 -0
  78. fabricks/context/utils.py +80 -0
  79. fabricks/core/__init__.py +4 -0
  80. fabricks/core/dags/__init__.py +9 -0
  81. fabricks/core/dags/base.py +99 -0
  82. fabricks/core/dags/generator.py +157 -0
  83. fabricks/core/dags/log.py +12 -0
  84. fabricks/core/dags/processor.py +228 -0
  85. fabricks/core/dags/run.py +39 -0
  86. fabricks/core/dags/terminator.py +25 -0
  87. fabricks/core/dags/utils.py +54 -0
  88. fabricks/core/extenders.py +33 -0
  89. fabricks/core/job_schema.py +32 -0
  90. fabricks/core/jobs/__init__.py +21 -0
  91. fabricks/core/jobs/base/__init__.py +10 -0
  92. fabricks/core/jobs/base/_types.py +284 -0
  93. fabricks/core/jobs/base/checker.py +139 -0
  94. fabricks/core/jobs/base/configurator.py +306 -0
  95. fabricks/core/jobs/base/exception.py +85 -0
  96. fabricks/core/jobs/base/generator.py +447 -0
  97. fabricks/core/jobs/base/invoker.py +206 -0
  98. fabricks/core/jobs/base/job.py +5 -0
  99. fabricks/core/jobs/base/processor.py +249 -0
  100. fabricks/core/jobs/bronze.py +395 -0
  101. fabricks/core/jobs/get_job.py +127 -0
  102. fabricks/core/jobs/get_job_conf.py +152 -0
  103. fabricks/core/jobs/get_job_id.py +31 -0
  104. fabricks/core/jobs/get_jobs.py +107 -0
  105. fabricks/core/jobs/get_schedule.py +10 -0
  106. fabricks/core/jobs/get_schedules.py +32 -0
  107. fabricks/core/jobs/gold.py +415 -0
  108. fabricks/core/jobs/silver.py +373 -0
  109. fabricks/core/masks.py +52 -0
  110. fabricks/core/parsers/__init__.py +12 -0
  111. fabricks/core/parsers/_types.py +6 -0
  112. fabricks/core/parsers/base.py +95 -0
  113. fabricks/core/parsers/decorator.py +11 -0
  114. fabricks/core/parsers/get_parser.py +26 -0
  115. fabricks/core/parsers/utils.py +69 -0
  116. fabricks/core/schedules/__init__.py +14 -0
  117. fabricks/core/schedules/diagrams.py +21 -0
  118. fabricks/core/schedules/generate.py +20 -0
  119. fabricks/core/schedules/get_schedule.py +5 -0
  120. fabricks/core/schedules/get_schedules.py +9 -0
  121. fabricks/core/schedules/process.py +9 -0
  122. fabricks/core/schedules/run.py +3 -0
  123. fabricks/core/schedules/terminate.py +6 -0
  124. fabricks/core/schedules/views.py +61 -0
  125. fabricks/core/steps/__init__.py +4 -0
  126. fabricks/core/steps/_types.py +7 -0
  127. fabricks/core/steps/base.py +423 -0
  128. fabricks/core/steps/get_step.py +10 -0
  129. fabricks/core/steps/get_step_conf.py +26 -0
  130. fabricks/core/udfs.py +106 -0
  131. fabricks/core/views.py +41 -0
  132. fabricks/deploy/__init__.py +92 -0
  133. fabricks/deploy/masks.py +8 -0
  134. fabricks/deploy/notebooks.py +71 -0
  135. fabricks/deploy/schedules.py +10 -0
  136. fabricks/deploy/tables.py +82 -0
  137. fabricks/deploy/udfs.py +19 -0
  138. fabricks/deploy/utils.py +36 -0
  139. fabricks/deploy/views.py +509 -0
  140. fabricks/metastore/README.md +3 -0
  141. fabricks/metastore/__init__.py +5 -0
  142. fabricks/metastore/_types.py +65 -0
  143. fabricks/metastore/database.py +65 -0
  144. fabricks/metastore/dbobject.py +66 -0
  145. fabricks/metastore/pyproject.toml +20 -0
  146. fabricks/metastore/table.py +768 -0
  147. fabricks/metastore/utils.py +51 -0
  148. fabricks/metastore/view.py +53 -0
  149. fabricks/utils/__init__.py +0 -0
  150. fabricks/utils/_types.py +6 -0
  151. fabricks/utils/azure_queue.py +93 -0
  152. fabricks/utils/azure_table.py +154 -0
  153. fabricks/utils/console.py +51 -0
  154. fabricks/utils/fdict.py +240 -0
  155. fabricks/utils/helpers.py +228 -0
  156. fabricks/utils/log.py +236 -0
  157. fabricks/utils/mermaid.py +32 -0
  158. fabricks/utils/path.py +242 -0
  159. fabricks/utils/pip.py +61 -0
  160. fabricks/utils/pydantic.py +94 -0
  161. fabricks/utils/read/__init__.py +11 -0
  162. fabricks/utils/read/_types.py +3 -0
  163. fabricks/utils/read/read.py +305 -0
  164. fabricks/utils/read/read_excel.py +5 -0
  165. fabricks/utils/read/read_yaml.py +33 -0
  166. fabricks/utils/schema/__init__.py +7 -0
  167. fabricks/utils/schema/get_json_schema_for_type.py +161 -0
  168. fabricks/utils/schema/get_schema_for_type.py +99 -0
  169. fabricks/utils/spark.py +76 -0
  170. fabricks/utils/sqlglot.py +56 -0
  171. fabricks/utils/write/__init__.py +8 -0
  172. fabricks/utils/write/delta.py +46 -0
  173. fabricks/utils/write/stream.py +27 -0
  174. fabricks-3.0.11.dist-info/METADATA +23 -0
  175. fabricks-3.0.11.dist-info/RECORD +176 -0
  176. fabricks-3.0.11.dist-info/WHEEL +4 -0
fabricks/core/udfs.py ADDED
@@ -0,0 +1,106 @@
1
+ import importlib.util
2
+ import os
3
+ import re
4
+ from typing import Callable, List, Optional
5
+
6
+ from pyspark.sql import SparkSession
7
+
8
+ from fabricks.context import CATALOG, IS_UNITY_CATALOG, PATH_UDFS, SPARK
9
+ from fabricks.context.log import DEFAULT_LOGGER
10
+
11
+ UDFS: dict[str, Callable] = {}
12
+
13
+
14
+ def register_all_udfs(extension: Optional[str] = None):
15
+ """
16
+ Register all user-defined functions (UDFs).
17
+ """
18
+ DEFAULT_LOGGER.info("register udfs")
19
+
20
+ for udf in get_udfs(extension=extension):
21
+ split = udf.split(".")
22
+ try:
23
+ register_udf(udf=split[0], extension=split[1])
24
+ except Exception as e:
25
+ DEFAULT_LOGGER.exception(f"could not register udf {udf}", exc_info=e)
26
+
27
+
28
+ def get_udfs(extension: Optional[str] = None) -> List[str]:
29
+ files = [os.path.basename(f) for f in PATH_UDFS.walk()]
30
+ udfs = [f for f in files if not str(f).endswith("__init__.py") and not str(f).endswith(".requirements.txt")]
31
+ if extension:
32
+ udfs = [f for f in udfs if f.endswith(f".{extension}")]
33
+ return udfs
34
+
35
+
36
+ def get_extension(udf: str) -> str:
37
+ for u in get_udfs():
38
+ r = re.compile(rf"{udf}(\.py|\.sql)")
39
+ if re.match(r, u):
40
+ return u.split(".")[1]
41
+
42
+ raise ValueError(f"{udf} not found")
43
+
44
+
45
+ def is_registered(udf: str, spark: Optional[SparkSession] = None) -> bool:
46
+ if spark is None:
47
+ spark = SPARK
48
+ assert spark is not None
49
+
50
+ df = spark.sql("show user functions in default")
51
+
52
+ if CATALOG:
53
+ df = df.where(f"function == '{CATALOG}.default.udf_{udf}'")
54
+ else:
55
+ df = df.where(f"function == 'spark_catalog.default.udf_{udf}'")
56
+
57
+ return not df.isEmpty()
58
+
59
+
60
+ def register_udf(udf: str, extension: Optional[str] = None, spark: Optional[SparkSession] = None):
61
+ """
62
+ Register a user-defined function (UDF).
63
+ """
64
+ if spark is None:
65
+ spark = SPARK
66
+ assert spark is not None
67
+
68
+ if not is_registered(udf, spark):
69
+ DEFAULT_LOGGER.debug(f"register udf {udf}")
70
+
71
+ if extension is None:
72
+ extension = get_extension(udf)
73
+
74
+ assert extension
75
+
76
+ path = PATH_UDFS.joinpath(f"{udf}.{extension}")
77
+
78
+ if extension == "sql":
79
+ spark.sql(path.get_sql())
80
+
81
+ elif extension == "py":
82
+ if not IS_UNITY_CATALOG:
83
+ assert path.exists(), f"udf not found ({path.string})"
84
+ else:
85
+ DEFAULT_LOGGER.debug(f"could not check if udf exists ({path.string})")
86
+
87
+ spec = importlib.util.spec_from_file_location(udf, path.string)
88
+ assert spec, f"no valid udf found ({path.string})"
89
+ assert spec.loader is not None
90
+
91
+ mod = importlib.util.module_from_spec(spec)
92
+ spec.loader.exec_module(mod)
93
+
94
+ u = UDFS[udf]
95
+ u(spark)
96
+
97
+ else:
98
+ raise ValueError(f"{udf} not found")
99
+
100
+
101
+ def udf(name: str):
102
+ def decorator(fn: Callable):
103
+ UDFS[name] = fn
104
+ return fn
105
+
106
+ return decorator
fabricks/core/views.py ADDED
@@ -0,0 +1,41 @@
1
+ from fabricks.context import PATH_VIEWS, SPARK
2
+ from fabricks.context.log import DEFAULT_LOGGER
3
+ from fabricks.utils.path import Path
4
+ from fabricks.utils.sqlglot import fix as fix_sql
5
+
6
+
7
+ def create_or_replace_view_internal(path: Path):
8
+ sql = path.get_sql()
9
+ file_name = path.get_file_name().split(".")[0]
10
+
11
+ try:
12
+ sql = f"""
13
+ create or replace view fabricks.{file_name}
14
+ as
15
+ {sql}
16
+ """
17
+ sql = fix_sql(sql)
18
+ DEFAULT_LOGGER.debug("create or replace (custom) view", extra={"label": f"fabricks.{file_name}", "sql": sql})
19
+
20
+ SPARK.sql(sql)
21
+
22
+ except Exception as e:
23
+ DEFAULT_LOGGER.exception(
24
+ "could not create nor replace (custom) view", extra={"label": f"fabricks.{file_name}", "exc_info": e}
25
+ )
26
+ raise e
27
+
28
+
29
+ def create_or_replace_view(name: str):
30
+ p = PATH_VIEWS.joinpath(f"{name}.sql")
31
+ create_or_replace_view_internal(p)
32
+
33
+
34
+ def create_or_replace_views():
35
+ DEFAULT_LOGGER.info("create or replace (custom) views")
36
+
37
+ for p in PATH_VIEWS.walk(file_format="sql", convert=True):
38
+ try:
39
+ create_or_replace_view_internal(p)
40
+ except Exception:
41
+ pass
@@ -0,0 +1,92 @@
1
+ import logging
2
+ from typing import List, Optional, Union, cast
3
+
4
+ from fabricks.context import FABRICKS_STORAGE
5
+ from fabricks.context.log import DEFAULT_LOGGER
6
+ from fabricks.core.jobs.base._types import Steps, TStep
7
+ from fabricks.core.steps.base import BaseStep
8
+ from fabricks.deploy.masks import deploy_masks
9
+ from fabricks.deploy.notebooks import deploy_notebooks
10
+ from fabricks.deploy.schedules import deploy_schedules
11
+ from fabricks.deploy.tables import deploy_tables
12
+ from fabricks.deploy.udfs import deploy_udfs
13
+ from fabricks.deploy.utils import print_atomic_bomb
14
+ from fabricks.deploy.views import deploy_views
15
+ from fabricks.metastore.database import Database
16
+
17
+
18
+ class Deploy:
19
+ @staticmethod
20
+ def tables(drop: bool = False):
21
+ deploy_tables(drop=drop)
22
+
23
+ @staticmethod
24
+ def views():
25
+ deploy_views()
26
+
27
+ @staticmethod
28
+ def udfs():
29
+ deploy_udfs()
30
+
31
+ @staticmethod
32
+ def masks():
33
+ deploy_masks()
34
+
35
+ @staticmethod
36
+ def notebooks():
37
+ deploy_notebooks()
38
+
39
+ @staticmethod
40
+ def schedules():
41
+ deploy_schedules()
42
+
43
+ @staticmethod
44
+ def armageddon(steps: Optional[Union[TStep, List[TStep], str, List[str]]], nowait: bool = False):
45
+ DEFAULT_LOGGER.warning("!💥 armageddon 💥!")
46
+ print_atomic_bomb(nowait=nowait)
47
+
48
+ DEFAULT_LOGGER.setLevel(logging.INFO)
49
+
50
+ if steps is None:
51
+ steps = Steps
52
+ assert steps is not None
53
+
54
+ if isinstance(steps, str):
55
+ steps = [cast(TStep, steps)]
56
+ elif isinstance(steps, List):
57
+ steps = [cast(TStep, s) for s in steps]
58
+ elif isinstance(steps, TStep):
59
+ steps = [steps]
60
+
61
+ fabricks = Database("fabricks")
62
+ fabricks.drop()
63
+
64
+ for s in steps:
65
+ step = BaseStep(s)
66
+ step.drop()
67
+
68
+ tmp = FABRICKS_STORAGE.joinpath("tmp")
69
+ tmp.rm()
70
+
71
+ checkpoint = FABRICKS_STORAGE.joinpath("checkpoints")
72
+ checkpoint.rm()
73
+
74
+ schema = FABRICKS_STORAGE.joinpath("schemas")
75
+ schema.rm()
76
+
77
+ schedule = FABRICKS_STORAGE.joinpath("schedules")
78
+ schedule.rm()
79
+
80
+ fabricks.create()
81
+
82
+ Deploy.tables(drop=True)
83
+ Deploy.udfs()
84
+ Deploy.masks()
85
+ Deploy.notebooks()
86
+
87
+ for s in steps:
88
+ step = BaseStep(s)
89
+ step.create()
90
+
91
+ Deploy.views()
92
+ Deploy.schedules()
@@ -0,0 +1,8 @@
1
+ from fabricks.context.log import DEFAULT_LOGGER
2
+ from fabricks.core.masks import register_all_masks
3
+
4
+
5
+ def deploy_masks():
6
+ DEFAULT_LOGGER.info("create or replace masks")
7
+
8
+ register_all_masks()
@@ -0,0 +1,71 @@
1
+ import base64
2
+ import io
3
+ import os
4
+ from importlib import resources
5
+
6
+ from databricks.sdk import WorkspaceClient
7
+ from databricks.sdk.service import workspace
8
+
9
+ from fabricks.context import PATH_NOTEBOOKS
10
+ from fabricks.context.log import DEFAULT_LOGGER
11
+
12
+
13
+ def deploy_notebook(notebook: str):
14
+ from fabricks.api import notebooks
15
+
16
+ DEFAULT_LOGGER.debug(f"overwrite {notebook}")
17
+
18
+ w = WorkspaceClient()
19
+
20
+ target = f"{PATH_NOTEBOOKS}/{notebook}.py"
21
+ src = resources.files(notebooks) / f"{notebook}.py"
22
+
23
+ with io.open(src, "rb") as file: # type: ignore
24
+ content = file.read()
25
+
26
+ encoded = base64.b64encode(content).decode("utf-8")
27
+
28
+ w.workspace.import_(
29
+ path=target,
30
+ content=encoded,
31
+ format=workspace.ImportFormat.AUTO,
32
+ language=workspace.Language.PYTHON,
33
+ overwrite=True,
34
+ )
35
+
36
+
37
+ def deploy_notebooks():
38
+ DEFAULT_LOGGER.info("overwrite notebooks")
39
+
40
+ _create_dir_if_not_exists()
41
+ _clean_dir()
42
+
43
+ for n in [
44
+ "cluster",
45
+ "initialize",
46
+ "process",
47
+ "schedule",
48
+ "run",
49
+ "terminate",
50
+ ]:
51
+ deploy_notebook(notebook=n)
52
+
53
+
54
+ def _create_dir_if_not_exists():
55
+ dir = str(PATH_NOTEBOOKS)
56
+ os.makedirs(dir, exist_ok=True)
57
+
58
+
59
+ def _clean_dir():
60
+ dir = str(PATH_NOTEBOOKS)
61
+ for n in [
62
+ "cluster",
63
+ "initialize",
64
+ "process",
65
+ "schedule",
66
+ "run",
67
+ "terminate",
68
+ ]:
69
+ file_path = os.path.join(dir, f"{n}.py")
70
+ if os.path.isfile(file_path):
71
+ os.remove(file_path)
@@ -0,0 +1,10 @@
1
+ from fabricks.context.log import DEFAULT_LOGGER
2
+ from fabricks.core.schedules import create_or_replace_views
3
+ from fabricks.core.views import create_or_replace_views as create_or_replace_custom_views
4
+
5
+
6
+ def deploy_schedules():
7
+ DEFAULT_LOGGER.info("create or replace schedules")
8
+
9
+ create_or_replace_custom_views()
10
+ create_or_replace_views()
@@ -0,0 +1,82 @@
1
+ from pyspark.sql.types import LongType, StringType, StructField, StructType, TimestampType
2
+
3
+ from fabricks.cdc import NoCDC
4
+ from fabricks.context import SPARK
5
+ from fabricks.context.log import DEFAULT_LOGGER
6
+ from fabricks.metastore.table import Table
7
+
8
+
9
+ def deploy_tables(drop: bool = False):
10
+ DEFAULT_LOGGER.info("create or replace fabricks (default) tables")
11
+
12
+ create_table_log(drop)
13
+ create_table_dummy(drop)
14
+ create_table_step(drop)
15
+
16
+
17
+ def create_table_step(drop: bool = False):
18
+ table = Table("fabricks", "steps")
19
+ if drop:
20
+ table.drop()
21
+
22
+ if not table.exists():
23
+ schema = StructType(
24
+ [
25
+ StructField("step", StringType(), True),
26
+ StructField("expand", StringType(), True),
27
+ StructField("order", LongType(), True),
28
+ ]
29
+ )
30
+ table.create(schema=schema, partitioning=True, partition_by=["expand"])
31
+
32
+
33
+ def create_table_log(drop: bool = False):
34
+ table = Table("fabricks", "logs")
35
+ if drop:
36
+ table.drop()
37
+
38
+ if not table.exists():
39
+ schema = StructType(
40
+ [
41
+ StructField("schedule_id", StringType(), True),
42
+ StructField("schedule", StringType(), True),
43
+ StructField("step", StringType(), True),
44
+ StructField("job_id", StringType(), True),
45
+ StructField("job", StringType(), True),
46
+ StructField("notebook_id", StringType(), True),
47
+ StructField("level", StringType(), True),
48
+ StructField("status", StringType(), True),
49
+ StructField("timestamp", TimestampType(), True),
50
+ StructField(
51
+ "exception",
52
+ StructType(
53
+ [
54
+ StructField("type", StringType(), True),
55
+ StructField("message", StringType(), True),
56
+ StructField("traceback", StringType(), True),
57
+ ]
58
+ ),
59
+ True,
60
+ ),
61
+ ]
62
+ )
63
+ table.create(schema=schema, partitioning=True, partition_by=["schedule_id", "step"])
64
+
65
+
66
+ def create_table_dummy(drop: bool = False):
67
+ cdc = NoCDC("fabricks", "dummy")
68
+
69
+ if drop:
70
+ cdc.drop()
71
+
72
+ if not cdc.table.exists():
73
+ df = SPARK.sql(
74
+ """
75
+ select
76
+ 1 as __key,
77
+ md5('1') as __hash,
78
+ cast('1900-01-01' as timestamp) as __valid_from,
79
+ cast('9999-12-31' as timestamp) as __valid_to
80
+ """
81
+ )
82
+ cdc.overwrite(df)
@@ -0,0 +1,19 @@
1
+ from fabricks.context import SPARK
2
+ from fabricks.context.log import DEFAULT_LOGGER
3
+ from fabricks.core.udfs import register_all_udfs
4
+ from fabricks.utils.sqlglot import fix as fix_sql
5
+
6
+
7
+ def deploy_udfs():
8
+ DEFAULT_LOGGER.info("create or replace udfs")
9
+
10
+ register_all_udfs(extension="sql")
11
+ create_or_replace_udf_job_id()
12
+
13
+
14
+ def create_or_replace_udf_job_id():
15
+ sql = "create or replace function fabricks.udf_job_id(job string) returns string return md5(job)"
16
+ sql = fix_sql(sql)
17
+
18
+ DEFAULT_LOGGER.debug("create or replace fabricks.udf_job_id", extra={"sql": sql})
19
+ SPARK.sql(sql)
@@ -0,0 +1,36 @@
1
+ import time
2
+
3
+
4
+ def print_atomic_bomb(nowait: bool = False):
5
+ def print_and_wait(message: str):
6
+ if not nowait:
7
+ time.sleep(0.5)
8
+ print(message)
9
+
10
+ print("")
11
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣀⡤⠤⠴⠾⠋⠉⠛⢾⡏⠙⠿⠦⠤⢤⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
12
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⡤⢶⣿⠉⢀⣀⡠⠆⠀⠀⠀⠀⠀⠀⠀⢤⣀⣀⠈⢹⣦⢤⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
13
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣴⣿⠁⢋⡙⠁⠀⡝⠀⠀⠀⠀⣀⡸⠋⠁⠀⠀⠹⡀⠀⠈⠈⠆⢹⢦⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
14
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⢀⣠⣤⣿⣁⡡⣴⡏⠀⠀⠀⢀⠀⢧⣀⠄⠀⠀⠀⣀⣰⠆⢀⠁⠀⠀⢈⣶⡤⣀⢹⣦⣄⡀⠀⠀⠀⠀⠀⠀⠀⠀ ")
15
+ print_and_wait(" ⠀⠀⠀⠀⠀⣠⢴⠟⢁⡝⠀⠁⠀⠃⠉⠀⠀⠘⣯⠀⡀⠾⣤⣄⣠⢤⠾⠄⠀⣸⠖⠀⠀⠈⠀⠃⠀⠀⠹⡄⠙⣶⢤⡀⠀⠀⠀⠀⠀ ")
16
+ print_and_wait(" ⠀⠀⠀⣠⠾⡇⠈⣀⡞⠀⠀⠀⠀⡀⠀⢀⣠⣄⣇⠀⣳⠴⠃⠀⠀⠀⠣⢴⠉⣰⣇⣀⣀⠀⠀⡄⠀⠀⠀⢹⣄⡘⠈⡷⣦⠀⠀⠀⠀ ")
17
+ print_and_wait(" ⢠⠞⠉⢻⡄⠀⠀⠈⠙⠀⠀⠀⠀⠙⣶⣏⣤⣤⠟⠉⠁⠀⠀⠀⠀⠀⠀⠀⠉⠙⢦⣱⣌⣷⠊⠀⠀⠀⠀⠈⠁⠀⠀⠀⡝⠉⠻⣄⠀ ")
18
+ print_and_wait(" ⠛⢀⡠⢼⡇⠀⠀⢀⡄⠀⢀⣀⡽⠚⠁⠀⠀⠀⢠⡀⢠⣀⠠⣔⢁⡀⠀⣄⠀⡄⠀⠀⠀⠈⠑⠺⣄⡀⠀⠠⡀⠀⠀⢠⡧⠄⠀⠘⢧ ")
19
+ print_and_wait(" ⡶⠋⠀⠀⠈⣠⣈⣩⠗⠒⠋⠀⠀⠀⠀⣀⣠⣆⡼⣷⣞⠛⠻⡉⠉⡟⠒⡛⣶⠧⣀⣀⣀⠀⠀⠀⠀⠈⠓⠺⢏⣉⣠⠋⠀⠀⠀⢢⣸ ")
20
+ print_and_wait(" ⠇⠐⠤⠤⠖⠁⣿⣀⣀⠀⠀⠀⠀⠀⠉⠁⠈⠉⠙⠛⢿⣷⡄⢣⡼⠀⣾⣿⠧⠒⠓⠚⠛⠉⠀⠀⠀⠀⠀⢀⣀⣾⡉⠓⠤⡤⠄⠸⢿ ")
21
+ print_and_wait(" ⣆⣤⠀⠀⠠⠀⠈⠓⠈⠓⠤⡀⠀⠀⠀⠀⠀⠀⠀⠀⠈⣿⣿⢸⠀⢸⣿⠇⠀⠀⠀⠀⠀⠀⠀⠀⢀⡤⠒⠁⠰⠃⠀⠠⠀⠀⢀⣀⠞ ")
22
+ print_and_wait(" ⠀⠉⠓⢲⣄⡈⢀⣠⠀⠀⠀⡸⠶⠂⠀⠀⢀⠀⠀⠤⠞⢻⡇⠀⠀⢘⡟⠑⠤⠄⠀⢀⠀⠀⠐⠲⢿⡀⠀⠀⢤⣀⢈⣀⡴⠖⠋⠀⠀ ")
23
+ print_and_wait(" ⠀⠀⠀⠀⠈⠉⠉⠙⠓⠒⣾⣁⣀⣴⠀⣀⠙⢧⠂⢀⣆⣀⣷⣤⣀⣾⣇⣀⡆⠀⢢⠛⢁⠀⢰⣀⣀⣹⠒⠒⠛⠉⠉⠉⠀⠀⠀⠀⠀ ")
24
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠉⠁⠈⠉⠉⠛⠉⠙⠉⠀⠀⣿⡟⣿⣿⠀⠀⠈⠉⠉⠙⠋⠉⠉⠀⠉⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
25
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⡇⢻⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
26
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣀⣤⣶⣾⣿⣿⠁⠀⢹⡛⣟⡶⢤⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
27
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣴⠛⢯⣽⡟⢿⣿⠛⠿⠳⠞⠻⣿⠻⣆⢽⠟⣶⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
28
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠛⠃⠲⠯⠴⣦⣼⣷⣤⣤⣶⣤⣩⡧⠽⠷⠐⠛⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
29
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣿⡇⠀⣿⡆⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
30
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣀⣄⡀⢀⣀⣠⡾⡿⢡⢐⠻⣿⣄⣀⡀⠀⣀⣄⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
31
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣤⢴⡏⠁⠀⠝⠉⣡⠟⣰⠃⢸⣿⠀⣷⠙⢧⡉⠻⡅⠀⠙⡷⢤⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
32
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣴⡟⠀⠈⣿⢄⡴⠞⠻⣄⣰⣡⠤⣞⣸⡤⢬⣧⣀⡿⠛⠦⣤⣶⡃⠀⢹⣦⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
33
+ print_and_wait(" ⠀⠀⠀⠀⠀⠀⢀⣴⣶⡿⠃⠉⢺⠁⠙⠒⠀⠀⣠⡉⠀⠉⠚⠉⠉⠑⠈⠀⠈⣧⠀⠀⠒⠋⠀⡹⠋⠀⢻⡶⠶⡄⠀⠀⠀⠀⠀⠀⠀ ")
34
+ print_and_wait(" ⠀⠀⠀⠀⠀⣠⣾⣿⣇⠁⢈⡦⠀⡍⠋⠁⡀⠸⡋⠀⠀⠀⢘⠏⠉⡏⠀⠀⠀⢉⡷⠀⡌⠉⠋⡇⠠⣏⠈⢁⣦⣿⣦⠀⠀⠀⠀⠀⠀ ")
35
+ print_and_wait(" ⠀⠀⠀⠀⠀⠉⣁⠀⠉⠉⠉⠙⠛⠛⠒⠚⠳⠤⢼⣤⣠⠤⣮⣠⣤⣼⠦⢤⣤⣿⠤⠾⠓⠒⠛⢓⠛⠉⠉⠉⠀⠈⠉⠀⠀⠀⠀⠀⠀ ")
36
+ print("")