fabricks 3.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fabricks/__init__.py +0 -0
- fabricks/api/__init__.py +11 -0
- fabricks/api/cdc/__init__.py +6 -0
- fabricks/api/cdc/nocdc.py +3 -0
- fabricks/api/cdc/scd1.py +3 -0
- fabricks/api/cdc/scd2.py +3 -0
- fabricks/api/context.py +27 -0
- fabricks/api/core.py +4 -0
- fabricks/api/deploy.py +3 -0
- fabricks/api/exceptions.py +19 -0
- fabricks/api/extenders.py +3 -0
- fabricks/api/job_schema.py +3 -0
- fabricks/api/log.py +3 -0
- fabricks/api/masks.py +3 -0
- fabricks/api/metastore/__init__.py +10 -0
- fabricks/api/metastore/database.py +3 -0
- fabricks/api/metastore/table.py +3 -0
- fabricks/api/metastore/view.py +6 -0
- fabricks/api/notebooks/__init__.py +0 -0
- fabricks/api/notebooks/cluster.py +6 -0
- fabricks/api/notebooks/initialize.py +42 -0
- fabricks/api/notebooks/process.py +54 -0
- fabricks/api/notebooks/run.py +59 -0
- fabricks/api/notebooks/schedule.py +75 -0
- fabricks/api/notebooks/terminate.py +31 -0
- fabricks/api/parsers.py +3 -0
- fabricks/api/schedules.py +3 -0
- fabricks/api/udfs.py +3 -0
- fabricks/api/utils.py +9 -0
- fabricks/api/version.py +3 -0
- fabricks/api/views.py +6 -0
- fabricks/cdc/__init__.py +14 -0
- fabricks/cdc/base/__init__.py +4 -0
- fabricks/cdc/base/_types.py +10 -0
- fabricks/cdc/base/cdc.py +5 -0
- fabricks/cdc/base/configurator.py +223 -0
- fabricks/cdc/base/generator.py +177 -0
- fabricks/cdc/base/merger.py +110 -0
- fabricks/cdc/base/processor.py +471 -0
- fabricks/cdc/cdc.py +5 -0
- fabricks/cdc/nocdc.py +20 -0
- fabricks/cdc/scd.py +22 -0
- fabricks/cdc/scd1.py +15 -0
- fabricks/cdc/scd2.py +15 -0
- fabricks/cdc/templates/__init__.py +0 -0
- fabricks/cdc/templates/ctes/base.sql.jinja +35 -0
- fabricks/cdc/templates/ctes/current.sql.jinja +28 -0
- fabricks/cdc/templates/ctes/deduplicate_hash.sql.jinja +32 -0
- fabricks/cdc/templates/ctes/deduplicate_key.sql.jinja +31 -0
- fabricks/cdc/templates/ctes/rectify.sql.jinja +113 -0
- fabricks/cdc/templates/ctes/slice.sql.jinja +1 -0
- fabricks/cdc/templates/filter.sql.jinja +4 -0
- fabricks/cdc/templates/filters/final.sql.jinja +4 -0
- fabricks/cdc/templates/filters/latest.sql.jinja +17 -0
- fabricks/cdc/templates/filters/update.sql.jinja +30 -0
- fabricks/cdc/templates/macros/bactick.sql.jinja +1 -0
- fabricks/cdc/templates/macros/hash.sql.jinja +18 -0
- fabricks/cdc/templates/merge.sql.jinja +3 -0
- fabricks/cdc/templates/merges/nocdc.sql.jinja +41 -0
- fabricks/cdc/templates/merges/scd1.sql.jinja +73 -0
- fabricks/cdc/templates/merges/scd2.sql.jinja +54 -0
- fabricks/cdc/templates/queries/__init__.py +0 -0
- fabricks/cdc/templates/queries/context.sql.jinja +186 -0
- fabricks/cdc/templates/queries/final.sql.jinja +1 -0
- fabricks/cdc/templates/queries/nocdc/complete.sql.jinja +10 -0
- fabricks/cdc/templates/queries/nocdc/update.sql.jinja +34 -0
- fabricks/cdc/templates/queries/scd1.sql.jinja +85 -0
- fabricks/cdc/templates/queries/scd2.sql.jinja +98 -0
- fabricks/cdc/templates/query.sql.jinja +15 -0
- fabricks/context/__init__.py +72 -0
- fabricks/context/_types.py +133 -0
- fabricks/context/config/__init__.py +92 -0
- fabricks/context/config/utils.py +53 -0
- fabricks/context/log.py +77 -0
- fabricks/context/runtime.py +117 -0
- fabricks/context/secret.py +103 -0
- fabricks/context/spark_session.py +82 -0
- fabricks/context/utils.py +80 -0
- fabricks/core/__init__.py +4 -0
- fabricks/core/dags/__init__.py +9 -0
- fabricks/core/dags/base.py +99 -0
- fabricks/core/dags/generator.py +157 -0
- fabricks/core/dags/log.py +12 -0
- fabricks/core/dags/processor.py +228 -0
- fabricks/core/dags/run.py +39 -0
- fabricks/core/dags/terminator.py +25 -0
- fabricks/core/dags/utils.py +54 -0
- fabricks/core/extenders.py +33 -0
- fabricks/core/job_schema.py +32 -0
- fabricks/core/jobs/__init__.py +21 -0
- fabricks/core/jobs/base/__init__.py +10 -0
- fabricks/core/jobs/base/_types.py +284 -0
- fabricks/core/jobs/base/checker.py +139 -0
- fabricks/core/jobs/base/configurator.py +306 -0
- fabricks/core/jobs/base/exception.py +85 -0
- fabricks/core/jobs/base/generator.py +447 -0
- fabricks/core/jobs/base/invoker.py +206 -0
- fabricks/core/jobs/base/job.py +5 -0
- fabricks/core/jobs/base/processor.py +249 -0
- fabricks/core/jobs/bronze.py +395 -0
- fabricks/core/jobs/get_job.py +127 -0
- fabricks/core/jobs/get_job_conf.py +152 -0
- fabricks/core/jobs/get_job_id.py +31 -0
- fabricks/core/jobs/get_jobs.py +107 -0
- fabricks/core/jobs/get_schedule.py +10 -0
- fabricks/core/jobs/get_schedules.py +32 -0
- fabricks/core/jobs/gold.py +415 -0
- fabricks/core/jobs/silver.py +373 -0
- fabricks/core/masks.py +52 -0
- fabricks/core/parsers/__init__.py +12 -0
- fabricks/core/parsers/_types.py +6 -0
- fabricks/core/parsers/base.py +95 -0
- fabricks/core/parsers/decorator.py +11 -0
- fabricks/core/parsers/get_parser.py +26 -0
- fabricks/core/parsers/utils.py +69 -0
- fabricks/core/schedules/__init__.py +14 -0
- fabricks/core/schedules/diagrams.py +21 -0
- fabricks/core/schedules/generate.py +20 -0
- fabricks/core/schedules/get_schedule.py +5 -0
- fabricks/core/schedules/get_schedules.py +9 -0
- fabricks/core/schedules/process.py +9 -0
- fabricks/core/schedules/run.py +3 -0
- fabricks/core/schedules/terminate.py +6 -0
- fabricks/core/schedules/views.py +61 -0
- fabricks/core/steps/__init__.py +4 -0
- fabricks/core/steps/_types.py +7 -0
- fabricks/core/steps/base.py +423 -0
- fabricks/core/steps/get_step.py +10 -0
- fabricks/core/steps/get_step_conf.py +26 -0
- fabricks/core/udfs.py +106 -0
- fabricks/core/views.py +41 -0
- fabricks/deploy/__init__.py +92 -0
- fabricks/deploy/masks.py +8 -0
- fabricks/deploy/notebooks.py +71 -0
- fabricks/deploy/schedules.py +10 -0
- fabricks/deploy/tables.py +82 -0
- fabricks/deploy/udfs.py +19 -0
- fabricks/deploy/utils.py +36 -0
- fabricks/deploy/views.py +509 -0
- fabricks/metastore/README.md +3 -0
- fabricks/metastore/__init__.py +5 -0
- fabricks/metastore/_types.py +65 -0
- fabricks/metastore/database.py +65 -0
- fabricks/metastore/dbobject.py +66 -0
- fabricks/metastore/pyproject.toml +20 -0
- fabricks/metastore/table.py +768 -0
- fabricks/metastore/utils.py +51 -0
- fabricks/metastore/view.py +53 -0
- fabricks/utils/__init__.py +0 -0
- fabricks/utils/_types.py +6 -0
- fabricks/utils/azure_queue.py +93 -0
- fabricks/utils/azure_table.py +154 -0
- fabricks/utils/console.py +51 -0
- fabricks/utils/fdict.py +240 -0
- fabricks/utils/helpers.py +228 -0
- fabricks/utils/log.py +236 -0
- fabricks/utils/mermaid.py +32 -0
- fabricks/utils/path.py +242 -0
- fabricks/utils/pip.py +61 -0
- fabricks/utils/pydantic.py +94 -0
- fabricks/utils/read/__init__.py +11 -0
- fabricks/utils/read/_types.py +3 -0
- fabricks/utils/read/read.py +305 -0
- fabricks/utils/read/read_excel.py +5 -0
- fabricks/utils/read/read_yaml.py +33 -0
- fabricks/utils/schema/__init__.py +7 -0
- fabricks/utils/schema/get_json_schema_for_type.py +161 -0
- fabricks/utils/schema/get_schema_for_type.py +99 -0
- fabricks/utils/spark.py +76 -0
- fabricks/utils/sqlglot.py +56 -0
- fabricks/utils/write/__init__.py +8 -0
- fabricks/utils/write/delta.py +46 -0
- fabricks/utils/write/stream.py +27 -0
- fabricks-3.0.11.dist-info/METADATA +23 -0
- fabricks-3.0.11.dist-info/RECORD +176 -0
- fabricks-3.0.11.dist-info/WHEEL +4 -0
fabricks/core/udfs.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import importlib.util
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from typing import Callable, List, Optional
|
|
5
|
+
|
|
6
|
+
from pyspark.sql import SparkSession
|
|
7
|
+
|
|
8
|
+
from fabricks.context import CATALOG, IS_UNITY_CATALOG, PATH_UDFS, SPARK
|
|
9
|
+
from fabricks.context.log import DEFAULT_LOGGER
|
|
10
|
+
|
|
11
|
+
UDFS: dict[str, Callable] = {}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def register_all_udfs(extension: Optional[str] = None):
|
|
15
|
+
"""
|
|
16
|
+
Register all user-defined functions (UDFs).
|
|
17
|
+
"""
|
|
18
|
+
DEFAULT_LOGGER.info("register udfs")
|
|
19
|
+
|
|
20
|
+
for udf in get_udfs(extension=extension):
|
|
21
|
+
split = udf.split(".")
|
|
22
|
+
try:
|
|
23
|
+
register_udf(udf=split[0], extension=split[1])
|
|
24
|
+
except Exception as e:
|
|
25
|
+
DEFAULT_LOGGER.exception(f"could not register udf {udf}", exc_info=e)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_udfs(extension: Optional[str] = None) -> List[str]:
|
|
29
|
+
files = [os.path.basename(f) for f in PATH_UDFS.walk()]
|
|
30
|
+
udfs = [f for f in files if not str(f).endswith("__init__.py") and not str(f).endswith(".requirements.txt")]
|
|
31
|
+
if extension:
|
|
32
|
+
udfs = [f for f in udfs if f.endswith(f".{extension}")]
|
|
33
|
+
return udfs
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_extension(udf: str) -> str:
|
|
37
|
+
for u in get_udfs():
|
|
38
|
+
r = re.compile(rf"{udf}(\.py|\.sql)")
|
|
39
|
+
if re.match(r, u):
|
|
40
|
+
return u.split(".")[1]
|
|
41
|
+
|
|
42
|
+
raise ValueError(f"{udf} not found")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def is_registered(udf: str, spark: Optional[SparkSession] = None) -> bool:
|
|
46
|
+
if spark is None:
|
|
47
|
+
spark = SPARK
|
|
48
|
+
assert spark is not None
|
|
49
|
+
|
|
50
|
+
df = spark.sql("show user functions in default")
|
|
51
|
+
|
|
52
|
+
if CATALOG:
|
|
53
|
+
df = df.where(f"function == '{CATALOG}.default.udf_{udf}'")
|
|
54
|
+
else:
|
|
55
|
+
df = df.where(f"function == 'spark_catalog.default.udf_{udf}'")
|
|
56
|
+
|
|
57
|
+
return not df.isEmpty()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def register_udf(udf: str, extension: Optional[str] = None, spark: Optional[SparkSession] = None):
|
|
61
|
+
"""
|
|
62
|
+
Register a user-defined function (UDF).
|
|
63
|
+
"""
|
|
64
|
+
if spark is None:
|
|
65
|
+
spark = SPARK
|
|
66
|
+
assert spark is not None
|
|
67
|
+
|
|
68
|
+
if not is_registered(udf, spark):
|
|
69
|
+
DEFAULT_LOGGER.debug(f"register udf {udf}")
|
|
70
|
+
|
|
71
|
+
if extension is None:
|
|
72
|
+
extension = get_extension(udf)
|
|
73
|
+
|
|
74
|
+
assert extension
|
|
75
|
+
|
|
76
|
+
path = PATH_UDFS.joinpath(f"{udf}.{extension}")
|
|
77
|
+
|
|
78
|
+
if extension == "sql":
|
|
79
|
+
spark.sql(path.get_sql())
|
|
80
|
+
|
|
81
|
+
elif extension == "py":
|
|
82
|
+
if not IS_UNITY_CATALOG:
|
|
83
|
+
assert path.exists(), f"udf not found ({path.string})"
|
|
84
|
+
else:
|
|
85
|
+
DEFAULT_LOGGER.debug(f"could not check if udf exists ({path.string})")
|
|
86
|
+
|
|
87
|
+
spec = importlib.util.spec_from_file_location(udf, path.string)
|
|
88
|
+
assert spec, f"no valid udf found ({path.string})"
|
|
89
|
+
assert spec.loader is not None
|
|
90
|
+
|
|
91
|
+
mod = importlib.util.module_from_spec(spec)
|
|
92
|
+
spec.loader.exec_module(mod)
|
|
93
|
+
|
|
94
|
+
u = UDFS[udf]
|
|
95
|
+
u(spark)
|
|
96
|
+
|
|
97
|
+
else:
|
|
98
|
+
raise ValueError(f"{udf} not found")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def udf(name: str):
|
|
102
|
+
def decorator(fn: Callable):
|
|
103
|
+
UDFS[name] = fn
|
|
104
|
+
return fn
|
|
105
|
+
|
|
106
|
+
return decorator
|
fabricks/core/views.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from fabricks.context import PATH_VIEWS, SPARK
|
|
2
|
+
from fabricks.context.log import DEFAULT_LOGGER
|
|
3
|
+
from fabricks.utils.path import Path
|
|
4
|
+
from fabricks.utils.sqlglot import fix as fix_sql
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def create_or_replace_view_internal(path: Path):
|
|
8
|
+
sql = path.get_sql()
|
|
9
|
+
file_name = path.get_file_name().split(".")[0]
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
sql = f"""
|
|
13
|
+
create or replace view fabricks.{file_name}
|
|
14
|
+
as
|
|
15
|
+
{sql}
|
|
16
|
+
"""
|
|
17
|
+
sql = fix_sql(sql)
|
|
18
|
+
DEFAULT_LOGGER.debug("create or replace (custom) view", extra={"label": f"fabricks.{file_name}", "sql": sql})
|
|
19
|
+
|
|
20
|
+
SPARK.sql(sql)
|
|
21
|
+
|
|
22
|
+
except Exception as e:
|
|
23
|
+
DEFAULT_LOGGER.exception(
|
|
24
|
+
"could not create nor replace (custom) view", extra={"label": f"fabricks.{file_name}", "exc_info": e}
|
|
25
|
+
)
|
|
26
|
+
raise e
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def create_or_replace_view(name: str):
|
|
30
|
+
p = PATH_VIEWS.joinpath(f"{name}.sql")
|
|
31
|
+
create_or_replace_view_internal(p)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def create_or_replace_views():
|
|
35
|
+
DEFAULT_LOGGER.info("create or replace (custom) views")
|
|
36
|
+
|
|
37
|
+
for p in PATH_VIEWS.walk(file_format="sql", convert=True):
|
|
38
|
+
try:
|
|
39
|
+
create_or_replace_view_internal(p)
|
|
40
|
+
except Exception:
|
|
41
|
+
pass
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import List, Optional, Union, cast
|
|
3
|
+
|
|
4
|
+
from fabricks.context import FABRICKS_STORAGE
|
|
5
|
+
from fabricks.context.log import DEFAULT_LOGGER
|
|
6
|
+
from fabricks.core.jobs.base._types import Steps, TStep
|
|
7
|
+
from fabricks.core.steps.base import BaseStep
|
|
8
|
+
from fabricks.deploy.masks import deploy_masks
|
|
9
|
+
from fabricks.deploy.notebooks import deploy_notebooks
|
|
10
|
+
from fabricks.deploy.schedules import deploy_schedules
|
|
11
|
+
from fabricks.deploy.tables import deploy_tables
|
|
12
|
+
from fabricks.deploy.udfs import deploy_udfs
|
|
13
|
+
from fabricks.deploy.utils import print_atomic_bomb
|
|
14
|
+
from fabricks.deploy.views import deploy_views
|
|
15
|
+
from fabricks.metastore.database import Database
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Deploy:
|
|
19
|
+
@staticmethod
|
|
20
|
+
def tables(drop: bool = False):
|
|
21
|
+
deploy_tables(drop=drop)
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def views():
|
|
25
|
+
deploy_views()
|
|
26
|
+
|
|
27
|
+
@staticmethod
|
|
28
|
+
def udfs():
|
|
29
|
+
deploy_udfs()
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def masks():
|
|
33
|
+
deploy_masks()
|
|
34
|
+
|
|
35
|
+
@staticmethod
|
|
36
|
+
def notebooks():
|
|
37
|
+
deploy_notebooks()
|
|
38
|
+
|
|
39
|
+
@staticmethod
|
|
40
|
+
def schedules():
|
|
41
|
+
deploy_schedules()
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def armageddon(steps: Optional[Union[TStep, List[TStep], str, List[str]]], nowait: bool = False):
|
|
45
|
+
DEFAULT_LOGGER.warning("!💥 armageddon 💥!")
|
|
46
|
+
print_atomic_bomb(nowait=nowait)
|
|
47
|
+
|
|
48
|
+
DEFAULT_LOGGER.setLevel(logging.INFO)
|
|
49
|
+
|
|
50
|
+
if steps is None:
|
|
51
|
+
steps = Steps
|
|
52
|
+
assert steps is not None
|
|
53
|
+
|
|
54
|
+
if isinstance(steps, str):
|
|
55
|
+
steps = [cast(TStep, steps)]
|
|
56
|
+
elif isinstance(steps, List):
|
|
57
|
+
steps = [cast(TStep, s) for s in steps]
|
|
58
|
+
elif isinstance(steps, TStep):
|
|
59
|
+
steps = [steps]
|
|
60
|
+
|
|
61
|
+
fabricks = Database("fabricks")
|
|
62
|
+
fabricks.drop()
|
|
63
|
+
|
|
64
|
+
for s in steps:
|
|
65
|
+
step = BaseStep(s)
|
|
66
|
+
step.drop()
|
|
67
|
+
|
|
68
|
+
tmp = FABRICKS_STORAGE.joinpath("tmp")
|
|
69
|
+
tmp.rm()
|
|
70
|
+
|
|
71
|
+
checkpoint = FABRICKS_STORAGE.joinpath("checkpoints")
|
|
72
|
+
checkpoint.rm()
|
|
73
|
+
|
|
74
|
+
schema = FABRICKS_STORAGE.joinpath("schemas")
|
|
75
|
+
schema.rm()
|
|
76
|
+
|
|
77
|
+
schedule = FABRICKS_STORAGE.joinpath("schedules")
|
|
78
|
+
schedule.rm()
|
|
79
|
+
|
|
80
|
+
fabricks.create()
|
|
81
|
+
|
|
82
|
+
Deploy.tables(drop=True)
|
|
83
|
+
Deploy.udfs()
|
|
84
|
+
Deploy.masks()
|
|
85
|
+
Deploy.notebooks()
|
|
86
|
+
|
|
87
|
+
for s in steps:
|
|
88
|
+
step = BaseStep(s)
|
|
89
|
+
step.create()
|
|
90
|
+
|
|
91
|
+
Deploy.views()
|
|
92
|
+
Deploy.schedules()
|
fabricks/deploy/masks.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import io
|
|
3
|
+
import os
|
|
4
|
+
from importlib import resources
|
|
5
|
+
|
|
6
|
+
from databricks.sdk import WorkspaceClient
|
|
7
|
+
from databricks.sdk.service import workspace
|
|
8
|
+
|
|
9
|
+
from fabricks.context import PATH_NOTEBOOKS
|
|
10
|
+
from fabricks.context.log import DEFAULT_LOGGER
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def deploy_notebook(notebook: str):
|
|
14
|
+
from fabricks.api import notebooks
|
|
15
|
+
|
|
16
|
+
DEFAULT_LOGGER.debug(f"overwrite {notebook}")
|
|
17
|
+
|
|
18
|
+
w = WorkspaceClient()
|
|
19
|
+
|
|
20
|
+
target = f"{PATH_NOTEBOOKS}/{notebook}.py"
|
|
21
|
+
src = resources.files(notebooks) / f"{notebook}.py"
|
|
22
|
+
|
|
23
|
+
with io.open(src, "rb") as file: # type: ignore
|
|
24
|
+
content = file.read()
|
|
25
|
+
|
|
26
|
+
encoded = base64.b64encode(content).decode("utf-8")
|
|
27
|
+
|
|
28
|
+
w.workspace.import_(
|
|
29
|
+
path=target,
|
|
30
|
+
content=encoded,
|
|
31
|
+
format=workspace.ImportFormat.AUTO,
|
|
32
|
+
language=workspace.Language.PYTHON,
|
|
33
|
+
overwrite=True,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def deploy_notebooks():
|
|
38
|
+
DEFAULT_LOGGER.info("overwrite notebooks")
|
|
39
|
+
|
|
40
|
+
_create_dir_if_not_exists()
|
|
41
|
+
_clean_dir()
|
|
42
|
+
|
|
43
|
+
for n in [
|
|
44
|
+
"cluster",
|
|
45
|
+
"initialize",
|
|
46
|
+
"process",
|
|
47
|
+
"schedule",
|
|
48
|
+
"run",
|
|
49
|
+
"terminate",
|
|
50
|
+
]:
|
|
51
|
+
deploy_notebook(notebook=n)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _create_dir_if_not_exists():
|
|
55
|
+
dir = str(PATH_NOTEBOOKS)
|
|
56
|
+
os.makedirs(dir, exist_ok=True)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _clean_dir():
|
|
60
|
+
dir = str(PATH_NOTEBOOKS)
|
|
61
|
+
for n in [
|
|
62
|
+
"cluster",
|
|
63
|
+
"initialize",
|
|
64
|
+
"process",
|
|
65
|
+
"schedule",
|
|
66
|
+
"run",
|
|
67
|
+
"terminate",
|
|
68
|
+
]:
|
|
69
|
+
file_path = os.path.join(dir, f"{n}.py")
|
|
70
|
+
if os.path.isfile(file_path):
|
|
71
|
+
os.remove(file_path)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from fabricks.context.log import DEFAULT_LOGGER
|
|
2
|
+
from fabricks.core.schedules import create_or_replace_views
|
|
3
|
+
from fabricks.core.views import create_or_replace_views as create_or_replace_custom_views
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def deploy_schedules():
|
|
7
|
+
DEFAULT_LOGGER.info("create or replace schedules")
|
|
8
|
+
|
|
9
|
+
create_or_replace_custom_views()
|
|
10
|
+
create_or_replace_views()
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from pyspark.sql.types import LongType, StringType, StructField, StructType, TimestampType
|
|
2
|
+
|
|
3
|
+
from fabricks.cdc import NoCDC
|
|
4
|
+
from fabricks.context import SPARK
|
|
5
|
+
from fabricks.context.log import DEFAULT_LOGGER
|
|
6
|
+
from fabricks.metastore.table import Table
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def deploy_tables(drop: bool = False):
|
|
10
|
+
DEFAULT_LOGGER.info("create or replace fabricks (default) tables")
|
|
11
|
+
|
|
12
|
+
create_table_log(drop)
|
|
13
|
+
create_table_dummy(drop)
|
|
14
|
+
create_table_step(drop)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def create_table_step(drop: bool = False):
|
|
18
|
+
table = Table("fabricks", "steps")
|
|
19
|
+
if drop:
|
|
20
|
+
table.drop()
|
|
21
|
+
|
|
22
|
+
if not table.exists():
|
|
23
|
+
schema = StructType(
|
|
24
|
+
[
|
|
25
|
+
StructField("step", StringType(), True),
|
|
26
|
+
StructField("expand", StringType(), True),
|
|
27
|
+
StructField("order", LongType(), True),
|
|
28
|
+
]
|
|
29
|
+
)
|
|
30
|
+
table.create(schema=schema, partitioning=True, partition_by=["expand"])
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def create_table_log(drop: bool = False):
|
|
34
|
+
table = Table("fabricks", "logs")
|
|
35
|
+
if drop:
|
|
36
|
+
table.drop()
|
|
37
|
+
|
|
38
|
+
if not table.exists():
|
|
39
|
+
schema = StructType(
|
|
40
|
+
[
|
|
41
|
+
StructField("schedule_id", StringType(), True),
|
|
42
|
+
StructField("schedule", StringType(), True),
|
|
43
|
+
StructField("step", StringType(), True),
|
|
44
|
+
StructField("job_id", StringType(), True),
|
|
45
|
+
StructField("job", StringType(), True),
|
|
46
|
+
StructField("notebook_id", StringType(), True),
|
|
47
|
+
StructField("level", StringType(), True),
|
|
48
|
+
StructField("status", StringType(), True),
|
|
49
|
+
StructField("timestamp", TimestampType(), True),
|
|
50
|
+
StructField(
|
|
51
|
+
"exception",
|
|
52
|
+
StructType(
|
|
53
|
+
[
|
|
54
|
+
StructField("type", StringType(), True),
|
|
55
|
+
StructField("message", StringType(), True),
|
|
56
|
+
StructField("traceback", StringType(), True),
|
|
57
|
+
]
|
|
58
|
+
),
|
|
59
|
+
True,
|
|
60
|
+
),
|
|
61
|
+
]
|
|
62
|
+
)
|
|
63
|
+
table.create(schema=schema, partitioning=True, partition_by=["schedule_id", "step"])
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def create_table_dummy(drop: bool = False):
|
|
67
|
+
cdc = NoCDC("fabricks", "dummy")
|
|
68
|
+
|
|
69
|
+
if drop:
|
|
70
|
+
cdc.drop()
|
|
71
|
+
|
|
72
|
+
if not cdc.table.exists():
|
|
73
|
+
df = SPARK.sql(
|
|
74
|
+
"""
|
|
75
|
+
select
|
|
76
|
+
1 as __key,
|
|
77
|
+
md5('1') as __hash,
|
|
78
|
+
cast('1900-01-01' as timestamp) as __valid_from,
|
|
79
|
+
cast('9999-12-31' as timestamp) as __valid_to
|
|
80
|
+
"""
|
|
81
|
+
)
|
|
82
|
+
cdc.overwrite(df)
|
fabricks/deploy/udfs.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from fabricks.context import SPARK
|
|
2
|
+
from fabricks.context.log import DEFAULT_LOGGER
|
|
3
|
+
from fabricks.core.udfs import register_all_udfs
|
|
4
|
+
from fabricks.utils.sqlglot import fix as fix_sql
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def deploy_udfs():
|
|
8
|
+
DEFAULT_LOGGER.info("create or replace udfs")
|
|
9
|
+
|
|
10
|
+
register_all_udfs(extension="sql")
|
|
11
|
+
create_or_replace_udf_job_id()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def create_or_replace_udf_job_id():
|
|
15
|
+
sql = "create or replace function fabricks.udf_job_id(job string) returns string return md5(job)"
|
|
16
|
+
sql = fix_sql(sql)
|
|
17
|
+
|
|
18
|
+
DEFAULT_LOGGER.debug("create or replace fabricks.udf_job_id", extra={"sql": sql})
|
|
19
|
+
SPARK.sql(sql)
|
fabricks/deploy/utils.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def print_atomic_bomb(nowait: bool = False):
|
|
5
|
+
def print_and_wait(message: str):
|
|
6
|
+
if not nowait:
|
|
7
|
+
time.sleep(0.5)
|
|
8
|
+
print(message)
|
|
9
|
+
|
|
10
|
+
print("")
|
|
11
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣀⡤⠤⠴⠾⠋⠉⠛⢾⡏⠙⠿⠦⠤⢤⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
|
|
12
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⡤⢶⣿⠉⢀⣀⡠⠆⠀⠀⠀⠀⠀⠀⠀⢤⣀⣀⠈⢹⣦⢤⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
|
|
13
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣴⣿⠁⢋⡙⠁⠀⡝⠀⠀⠀⠀⣀⡸⠋⠁⠀⠀⠹⡀⠀⠈⠈⠆⢹⢦⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
|
|
14
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⢀⣠⣤⣿⣁⡡⣴⡏⠀⠀⠀⢀⠀⢧⣀⠄⠀⠀⠀⣀⣰⠆⢀⠁⠀⠀⢈⣶⡤⣀⢹⣦⣄⡀⠀⠀⠀⠀⠀⠀⠀⠀ ")
|
|
15
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⣠⢴⠟⢁⡝⠀⠁⠀⠃⠉⠀⠀⠘⣯⠀⡀⠾⣤⣄⣠⢤⠾⠄⠀⣸⠖⠀⠀⠈⠀⠃⠀⠀⠹⡄⠙⣶⢤⡀⠀⠀⠀⠀⠀ ")
|
|
16
|
+
print_and_wait(" ⠀⠀⠀⣠⠾⡇⠈⣀⡞⠀⠀⠀⠀⡀⠀⢀⣠⣄⣇⠀⣳⠴⠃⠀⠀⠀⠣⢴⠉⣰⣇⣀⣀⠀⠀⡄⠀⠀⠀⢹⣄⡘⠈⡷⣦⠀⠀⠀⠀ ")
|
|
17
|
+
print_and_wait(" ⢠⠞⠉⢻⡄⠀⠀⠈⠙⠀⠀⠀⠀⠙⣶⣏⣤⣤⠟⠉⠁⠀⠀⠀⠀⠀⠀⠀⠉⠙⢦⣱⣌⣷⠊⠀⠀⠀⠀⠈⠁⠀⠀⠀⡝⠉⠻⣄⠀ ")
|
|
18
|
+
print_and_wait(" ⠛⢀⡠⢼⡇⠀⠀⢀⡄⠀⢀⣀⡽⠚⠁⠀⠀⠀⢠⡀⢠⣀⠠⣔⢁⡀⠀⣄⠀⡄⠀⠀⠀⠈⠑⠺⣄⡀⠀⠠⡀⠀⠀⢠⡧⠄⠀⠘⢧ ")
|
|
19
|
+
print_and_wait(" ⡶⠋⠀⠀⠈⣠⣈⣩⠗⠒⠋⠀⠀⠀⠀⣀⣠⣆⡼⣷⣞⠛⠻⡉⠉⡟⠒⡛⣶⠧⣀⣀⣀⠀⠀⠀⠀⠈⠓⠺⢏⣉⣠⠋⠀⠀⠀⢢⣸ ")
|
|
20
|
+
print_and_wait(" ⠇⠐⠤⠤⠖⠁⣿⣀⣀⠀⠀⠀⠀⠀⠉⠁⠈⠉⠙⠛⢿⣷⡄⢣⡼⠀⣾⣿⠧⠒⠓⠚⠛⠉⠀⠀⠀⠀⠀⢀⣀⣾⡉⠓⠤⡤⠄⠸⢿ ")
|
|
21
|
+
print_and_wait(" ⣆⣤⠀⠀⠠⠀⠈⠓⠈⠓⠤⡀⠀⠀⠀⠀⠀⠀⠀⠀⠈⣿⣿⢸⠀⢸⣿⠇⠀⠀⠀⠀⠀⠀⠀⠀⢀⡤⠒⠁⠰⠃⠀⠠⠀⠀⢀⣀⠞ ")
|
|
22
|
+
print_and_wait(" ⠀⠉⠓⢲⣄⡈⢀⣠⠀⠀⠀⡸⠶⠂⠀⠀⢀⠀⠀⠤⠞⢻⡇⠀⠀⢘⡟⠑⠤⠄⠀⢀⠀⠀⠐⠲⢿⡀⠀⠀⢤⣀⢈⣀⡴⠖⠋⠀⠀ ")
|
|
23
|
+
print_and_wait(" ⠀⠀⠀⠀⠈⠉⠉⠙⠓⠒⣾⣁⣀⣴⠀⣀⠙⢧⠂⢀⣆⣀⣷⣤⣀⣾⣇⣀⡆⠀⢢⠛⢁⠀⢰⣀⣀⣹⠒⠒⠛⠉⠉⠉⠀⠀⠀⠀⠀ ")
|
|
24
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠉⠁⠈⠉⠉⠛⠉⠙⠉⠀⠀⣿⡟⣿⣿⠀⠀⠈⠉⠉⠙⠋⠉⠉⠀⠉⠁⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
|
|
25
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣿⡇⢻⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
|
|
26
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣀⣤⣶⣾⣿⣿⠁⠀⢹⡛⣟⡶⢤⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
|
|
27
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⣴⠛⢯⣽⡟⢿⣿⠛⠿⠳⠞⠻⣿⠻⣆⢽⠟⣶⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
|
|
28
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠛⠃⠲⠯⠴⣦⣼⣷⣤⣤⣶⣤⣩⡧⠽⠷⠐⠛⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
|
|
29
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣿⡇⠀⣿⡆⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
|
|
30
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣀⣄⡀⢀⣀⣠⡾⡿⢡⢐⠻⣿⣄⣀⡀⠀⣀⣄⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
|
|
31
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣤⢴⡏⠁⠀⠝⠉⣡⠟⣰⠃⢸⣿⠀⣷⠙⢧⡉⠻⡅⠀⠙⡷⢤⣀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
|
|
32
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣴⡟⠀⠈⣿⢄⡴⠞⠻⣄⣰⣡⠤⣞⣸⡤⢬⣧⣀⡿⠛⠦⣤⣶⡃⠀⢹⣦⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ ")
|
|
33
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠀⢀⣴⣶⡿⠃⠉⢺⠁⠙⠒⠀⠀⣠⡉⠀⠉⠚⠉⠉⠑⠈⠀⠈⣧⠀⠀⠒⠋⠀⡹⠋⠀⢻⡶⠶⡄⠀⠀⠀⠀⠀⠀⠀ ")
|
|
34
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⣠⣾⣿⣇⠁⢈⡦⠀⡍⠋⠁⡀⠸⡋⠀⠀⠀⢘⠏⠉⡏⠀⠀⠀⢉⡷⠀⡌⠉⠋⡇⠠⣏⠈⢁⣦⣿⣦⠀⠀⠀⠀⠀⠀ ")
|
|
35
|
+
print_and_wait(" ⠀⠀⠀⠀⠀⠉⣁⠀⠉⠉⠉⠙⠛⠛⠒⠚⠳⠤⢼⣤⣠⠤⣮⣠⣤⣼⠦⢤⣤⣿⠤⠾⠓⠒⠛⢓⠛⠉⠉⠉⠀⠈⠉⠀⠀⠀⠀⠀⠀ ")
|
|
36
|
+
print("")
|